This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Remove UNSPEC_LOADU and UNSPEC_STOREU


On Sun, Jan 10, 2016 at 11:45 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Sun, Jan 10, 2016 at 11:32 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> Since *mov<mode>_internal and <avx512>_(load|store)<mode>_mask patterns
>> can handle unaligned load and store, we can remove UNSPEC_LOADU and
>> UNSPEC_STOREU.  We use function prototypes with pointer to scalar for
>> unaligned load/store builtin functions so that memory passed to
>> *mov<mode>_internal is unaligned.
>>
>> Tested on x86-64.  Is this OK for trunk in stage 3?
>
> This patch is not appropriate for stage 3.
>
> Uros.
>
>> H.J.
>> ----
>> gcc/
>>
>>         PR target/69201
>>         * config/i386/avx512bwintrin.h (_mm512_mask_loadu_epi16): Pass
>>         const short * to __builtin_ia32_loaddquhi512_mask.
>>         (_mm512_maskz_loadu_epi16): Likewise.
>>         (_mm512_mask_storeu_epi16): Pass short * to
>>         __builtin_ia32_storedquhi512_mask.
>>         (_mm512_mask_loadu_epi8): Pass const char * to
>>         __builtin_ia32_loaddquqi512_mask.
>>         (_mm512_maskz_loadu_epi8): Likewise.
>>         (_mm512_mask_storeu_epi8): Pass char * to
>>         __builtin_ia32_storedquqi512_mask.
>>         * config/i386/avx512fintrin.h (_mm512_loadu_pd): Pass
>>         const double * to __builtin_ia32_loadupd512_mask.
>>         (_mm512_mask_loadu_pd): Likewise.
>>         (_mm512_maskz_loadu_pd): Likewise.
>>         (_mm512_storeu_pd): Pass double * to
>>         __builtin_ia32_storeupd512_mask.
>>         (_mm512_mask_storeu_pd): Likewise.
>>         (_mm512_loadu_ps): Pass const float * to
>>         __builtin_ia32_loadups512_mask.
>>         (_mm512_mask_loadu_ps): Likewise.
>>         (_mm512_maskz_loadu_ps): Likewise.
>>         (_mm512_storeu_ps): Pass float * to
>>         __builtin_ia32_storeups512_mask.
>>         (_mm512_mask_storeu_ps): Likewise.
>>         (_mm512_mask_loadu_epi64): Pass const long long * to
>>         __builtin_ia32_loaddqudi512_mask.
>>         (_mm512_maskz_loadu_epi64): Likewise.
>>         (_mm512_mask_storeu_epi64): Pass long long *
>>         to __builtin_ia32_storedqudi512_mask.
>>         (_mm512_loadu_si512): Pass const int * to
>>         __builtin_ia32_loaddqusi512_mask.
>>         (_mm512_mask_loadu_epi32): Likewise.
>>         (_mm512_maskz_loadu_epi32): Likewise.
>>         (_mm512_storeu_si512): Pass int * to
>>         __builtin_ia32_storedqusi512_mask.
>>         (_mm512_mask_storeu_epi32): Likewise.
>>         * config/i386/avx512vlbwintrin.h (_mm256_mask_storeu_epi8): Pass
>>         char * to __builtin_ia32_storedquqi256_mask.
>>         (_mm_mask_storeu_epi8): Likewise.
>>         (_mm256_mask_loadu_epi16): Pass const short * to
>>         __builtin_ia32_loaddquhi256_mask.
>>         (_mm256_maskz_loadu_epi16): Likewise.
>>         (_mm_mask_loadu_epi16): Pass const short * to
>>         __builtin_ia32_loaddquhi128_mask.
>>         (_mm_maskz_loadu_epi16): Likewise.
>>         (_mm256_mask_loadu_epi8): Pass const char * to
>>         __builtin_ia32_loaddquqi256_mask.
>>         (_mm256_maskz_loadu_epi8): Likewise.
>>         (_mm_mask_loadu_epi8): Pass const char * to
>>         __builtin_ia32_loaddquqi128_mask.
>>         (_mm_maskz_loadu_epi8): Likewise.
>>         (_mm256_mask_storeu_epi16): Pass short * to.
>>         __builtin_ia32_storedquhi256_mask.
>>         (_mm_mask_storeu_epi16): Pass short * to.
>>         __builtin_ia32_storedquhi128_mask.
>>         * config/i386/avx512vlintrin.h (_mm256_mask_loadu_pd): Pass
>>         const double * to __builtin_ia32_loadupd256_mask.
>>         (_mm256_maskz_loadu_pd): Likewise.
>>         (_mm_mask_loadu_pd): Pass onst double * to
>>         __builtin_ia32_loadupd128_mask.
>>         (_mm_maskz_loadu_pd): Likewise.
>>         (_mm256_mask_storeu_pd): Pass double * to
>>         __builtin_ia32_storeupd256_mask.
>>         (_mm_mask_storeu_pd): Pass double * to
>>         __builtin_ia32_storeupd128_mask.
>>         (_mm256_mask_loadu_ps): Pass const float * to
>>         __builtin_ia32_loadups256_mask.
>>         (_mm256_maskz_loadu_ps): Likewise.
>>         (_mm_mask_loadu_ps): Pass const float * to
>>         __builtin_ia32_loadups128_mask.
>>         (_mm_maskz_loadu_ps): Likewise.
>>         (_mm256_mask_storeu_ps): Pass float * to
>>         __builtin_ia32_storeups256_mask.
>>         (_mm_mask_storeu_ps): ass float * to
>>         __builtin_ia32_storeups128_mask.
>>         (_mm256_mask_loadu_epi64): Pass const long long * to
>>         __builtin_ia32_loaddqudi256_mask.
>>         (_mm256_maskz_loadu_epi64): Likewise.
>>         (_mm_mask_loadu_epi64): Pass const long long * to
>>         __builtin_ia32_loaddqudi128_mask.
>>         (_mm_maskz_loadu_epi64): Likewise.
>>         (_mm256_mask_storeu_epi64): Pass long long * to
>>         __builtin_ia32_storedqudi256_mask.
>>         (_mm_mask_storeu_epi64): Pass long long * to
>>         __builtin_ia32_storedqudi128_mask.
>>         (_mm256_mask_loadu_epi32): Pass const int * to
>>         __builtin_ia32_loaddqusi256_mask.
>>         (_mm256_maskz_loadu_epi32): Likewise.
>>         (_mm_mask_loadu_epi32): Pass const int * to
>>         __builtin_ia32_loaddqusi128_mask.
>>         (_mm_maskz_loadu_epi32): Likewise.
>>         (_mm256_mask_storeu_epi32): Pass int * to
>>         __builtin_ia32_storedqusi256_mask.
>>         (_mm_mask_storeu_epi32): Pass int * to
>>         __builtin_ia32_storedqusi128_mask.
>>         * config/i386/i386-builtin-types.def (PCSHORT): New.
>>         (PINT64): Likewise.
>>         (V64QI_FTYPE_PCCHAR_V64QI_UDI): Likewise.
>>         (V32HI_FTYPE_PCSHORT_V32HI_USI): Likewise.
>>         (V32QI_FTYPE_PCCHAR_V32QI_USI): Likewise.
>>         (V16SF_FTYPE_PCFLOAT_V16SF_UHI): Likewise.
>>         (V8DF_FTYPE_PCDOUBLE_V8DF_UQI): Likewise.
>>         (V16SI_FTYPE_PCINT_V16SI_UHI): Likewise.
>>         (V16HI_FTYPE_PCSHORT_V16HI_UHI): Likewise.
>>         (V16QI_FTYPE_PCCHAR_V16QI_UHI): Likewise.
>>         (V8SF_FTYPE_PCFLOAT_V8SF_UQI): Likewise.
>>         (V8DI_FTYPE_PCINT64_V8DI_UQI): Likewise.
>>         (V8SI_FTYPE_PCINT_V8SI_UQI): Likewise.
>>         (V8HI_FTYPE_PCSHORT_V8HI_UQI): Likewise.
>>         (V4DF_FTYPE_PCDOUBLE_V4DF_UQI): Likewise.
>>         (V4SF_FTYPE_PCFLOAT_V4SF_UQI): Likewise.
>>         (V4DI_FTYPE_PCINT64_V4DI_UQI): Likewise.
>>         (V4SI_FTYPE_PCINT_V4SI_UQI): Likewise.
>>         (V2DF_FTYPE_PCDOUBLE_V2DF_UQI): Likewise.
>>         (V2DI_FTYPE_PCINT64_V2DI_UQI): Likewise.
>>         (VOID_FTYPE_PDOUBLE_V8DF_UQI): Likewise.
>>         (VOID_FTYPE_PDOUBLE_V4DF_UQI): Likewise.
>>         (VOID_FTYPE_PDOUBLE_V2DF_UQI): Likewise.
>>         (VOID_FTYPE_PFLOAT_V16SF_UHI): Likewise.
>>         (VOID_FTYPE_PFLOAT_V8SF_UQI): Likewise.
>>         (VOID_FTYPE_PFLOAT_V4SF_UQI): Likewise.
>>         (VOID_FTYPE_PINT64_V8DI_UQI): Likewise.
>>         (VOID_FTYPE_PINT64_V4DI_UQI): Likewise.
>>         (VOID_FTYPE_PINT64_V2DI_UQI): Likewise.
>>         (VOID_FTYPE_PINT_V16SI_UHI): Likewise.
>>         (VOID_FTYPE_PINT_V8SI_UHI): Likewise.
>>         (VOID_FTYPE_PINT_V4SI_UHI): Likewise.
>>         (VOID_FTYPE_PSHORT_V32HI_USI): Likewise.
>>         (VOID_FTYPE_PSHORT_V16HI_UHI): Likewise.
>>         (VOID_FTYPE_PSHORT_V8HI_UQI): Likewise.
>>         (VOID_FTYPE_PCHAR_V64QI_UDI): Likewise.
>>         (VOID_FTYPE_PCHAR_V32QI_USI): Likewise.
>>         (VOID_FTYPE_PCHAR_V16QI_UHI): Likewise.
>>         (V64QI_FTYPE_PCV64QI_V64QI_UDI): Removed.
>>         (V32HI_FTYPE_PCV32HI_V32HI_USI): Likewise.
>>         (V32QI_FTYPE_PCV32QI_V32QI_USI): Likewise.
>>         (V16HI_FTYPE_PCV16HI_V16HI_UHI): Likewise.
>>         (V16QI_FTYPE_PCV16QI_V16QI_UHI): Likewise.
>>         (V8HI_FTYPE_PCV8HI_V8HI_UQI): Likewise.
>>         (VOID_FTYPE_PV32HI_V32HI_USI): Likewise.
>>         (VOID_FTYPE_PV16HI_V16HI_UHI): Likewise.
>>         (VOID_FTYPE_PV8HI_V8HI_UQI): Likewise.
>>         (VOID_FTYPE_PV64QI_V64QI_UDI): Likewise.
>>         (VOID_FTYPE_PV32QI_V32QI_USI): Likewise.
>>         (VOID_FTYPE_PV16QI_V16QI_UHI): Likewise.
>>         * config/i386/i386.c (ix86_emit_save_reg_using_mov): Don't
>>         use UNSPEC_STOREU.
>>         (ix86_emit_restore_sse_regs_using_mov): Don't use UNSPEC_LOADU.
>>         (ix86_avx256_split_vector_move_misalign): Don't use unaligned
>>         load nor store.
>>         (ix86_expand_vector_move_misalign): Likewise.
>>         (bdesc_special_args): Use CODE_FOR_movvNXY_internal and pointer
>>         to scalar function prototype for unaligned load/store builtins.
>>         (ix86_expand_special_args_builtin): Updated.
>>         * config/i386/sse.md (UNSPEC_LOADU): Removed.
>>         (UNSPEC_STOREU): Likewise.
>>         (VI_ULOADSTORE_BW_AVX512VL): Likewise.
>>         (VI_ULOADSTORE_F_AVX512VL): Likewise.
>>         (<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>): Likewise.
>>         (*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>): Likewise.
>>         (<sse>_storeu<ssemodesuffix><avxsizesuffix>): Likewise.
>>         (<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask): Likewise.
>>         (<sse2_avx_avx512f>_loaddqu<mode><mask_name>): Likewise.
>>         (*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"): Likewise.
>>         (sse2_avx_avx512f>_storedqu<mode>): Likewise.
>>         (<avx512>_storedqu<mode>_mask): Likewise.
>>         (*sse4_2_pcmpestr_unaligned): Likewise.
>>         (*sse4_2_pcmpistr_unaligned): Likewise.
>>         (*mov<mode>_internal): Renamed to ...
>>         (mov<mode>_internal): This.  Remove check of AVX and IAMCU on
>>         misaligned operand.  Replace vmovdqu64 with vmovdqu<ssescalarsize>.
>>         (movsd/movhpd to movupd peephole): Don't use UNSPEC_LOADU.
>>         (movlpd/movhpd to movupd peephole): Don't use UNSPEC_STOREU.
>>
>> gcc/testsuite/
>>
>>         PR target/69201
>>         * gcc.target/i386/avx256-unaligned-store-1.c (a): Make it
>>         extern to force it misaligned.
>>         (b): Likewise.
>>         (c): Likewise.
>>         (d): Likewise.
>>         Check vmovups.*movv8sf_internal/3 instead of avx_storeups256.
>>         Don't check `*' before movv4sf_internal.
>>         * gcc.target/i386/avx256-unaligned-store-2.c: Check
>>         vmovups.*movv32qi_internal/3 instead of avx_storeups256.
>>         Don't check `*' before movv16qi_internal.
>>         * gcc.target/i386/avx256-unaligned-store-3.c (a): Make it
>>         extern to force it misaligned.
>>         (b): Likewise.
>>         (c): Likewise.
>>         (d): Likewise.
>>         Check vmovups.*movv4df_internal/3 instead of avx_storeupd256.
>>         Don't check `*' before movv2df_internal.
>>         * gcc.target/i386/avx256-unaligned-store-4.c (a): Make it
>>         extern to force it misaligned.
>>         (b): Likewise.
>>         (c): Likewise.
>>         (d): Likewise.
>>         Check movv8sf_internal instead of avx_storeups256.
>>         Check movups.*movv4sf_internal/3 instead of avx_storeups256.


Here is the updated patch for GCC 7.  Tested on x86-64.  OK for
trrunk?


-- 
H.J.
From e04524c617b91b5f00d083f9d79a71b49957fbda Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Sat, 9 Jan 2016 04:55:18 -0800
Subject: [PATCH] Remove UNSPEC_LOADU and UNSPEC_STOREU

Since *mov<mode>_internal and <avx512>_(load|store)<mode>_mask patterns
can handle unaligned load and store, we can remove UNSPEC_LOADU and
UNSPEC_STOREU.  We use function prototypes with pointer to scalar for
unaligned load/store builtin functions so that memory passed to
*mov<mode>_internal is unaligned.

gcc/

	PR target/69201
	* config/i386/avx512bwintrin.h (_mm512_mask_loadu_epi16): Pass
	const short * to __builtin_ia32_loaddquhi512_mask.
	(_mm512_maskz_loadu_epi16): Likewise.
	(_mm512_mask_storeu_epi16): Pass short * to
	__builtin_ia32_storedquhi512_mask.
	(_mm512_mask_loadu_epi8): Pass const char * to
	__builtin_ia32_loaddquqi512_mask.
	(_mm512_maskz_loadu_epi8): Likewise.
	(_mm512_mask_storeu_epi8): Pass char * to
	__builtin_ia32_storedquqi512_mask.
	* config/i386/avx512fintrin.h (_mm512_loadu_pd): Pass
	const double * to __builtin_ia32_loadupd512_mask.
	(_mm512_mask_loadu_pd): Likewise.
	(_mm512_maskz_loadu_pd): Likewise.
	(_mm512_storeu_pd): Pass double * to
	__builtin_ia32_storeupd512_mask.
	(_mm512_mask_storeu_pd): Likewise.
	(_mm512_loadu_ps): Pass const float * to
	__builtin_ia32_loadups512_mask.
	(_mm512_mask_loadu_ps): Likewise.
	(_mm512_maskz_loadu_ps): Likewise.
	(_mm512_storeu_ps): Pass float * to
	__builtin_ia32_storeups512_mask.
	(_mm512_mask_storeu_ps): Likewise.
	(_mm512_mask_loadu_epi64): Pass const long long * to
	__builtin_ia32_loaddqudi512_mask.
	(_mm512_maskz_loadu_epi64): Likewise.
	(_mm512_mask_storeu_epi64): Pass long long *
	to __builtin_ia32_storedqudi512_mask.
	(_mm512_loadu_si512): Pass const int * to
	__builtin_ia32_loaddqusi512_mask.
	(_mm512_mask_loadu_epi32): Likewise.
	(_mm512_maskz_loadu_epi32): Likewise.
	(_mm512_storeu_si512): Pass int * to
	__builtin_ia32_storedqusi512_mask.
	(_mm512_mask_storeu_epi32): Likewise.
	* config/i386/avx512vlbwintrin.h (_mm256_mask_storeu_epi8): Pass
	char * to __builtin_ia32_storedquqi256_mask.
	(_mm_mask_storeu_epi8): Likewise.
	(_mm256_mask_loadu_epi16): Pass const short * to
	__builtin_ia32_loaddquhi256_mask.
	(_mm256_maskz_loadu_epi16): Likewise.
	(_mm_mask_loadu_epi16): Pass const short * to
	__builtin_ia32_loaddquhi128_mask.
	(_mm_maskz_loadu_epi16): Likewise.
	(_mm256_mask_loadu_epi8): Pass const char * to
	__builtin_ia32_loaddquqi256_mask.
	(_mm256_maskz_loadu_epi8): Likewise.
	(_mm_mask_loadu_epi8): Pass const char * to
	__builtin_ia32_loaddquqi128_mask.
	(_mm_maskz_loadu_epi8): Likewise.
	(_mm256_mask_storeu_epi16): Pass short * to.
	__builtin_ia32_storedquhi256_mask.
	(_mm_mask_storeu_epi16): Pass short * to.
	__builtin_ia32_storedquhi128_mask.
	* config/i386/avx512vlintrin.h (_mm256_mask_loadu_pd): Pass
	const double * to __builtin_ia32_loadupd256_mask.
	(_mm256_maskz_loadu_pd): Likewise.
	(_mm_mask_loadu_pd): Pass onst double * to
	__builtin_ia32_loadupd128_mask.
	(_mm_maskz_loadu_pd): Likewise.
	(_mm256_mask_storeu_pd): Pass double * to
	__builtin_ia32_storeupd256_mask.
	(_mm_mask_storeu_pd): Pass double * to
	__builtin_ia32_storeupd128_mask.
	(_mm256_mask_loadu_ps): Pass const float * to
	__builtin_ia32_loadups256_mask.
	(_mm256_maskz_loadu_ps): Likewise.
	(_mm_mask_loadu_ps): Pass const float * to
	__builtin_ia32_loadups128_mask.
	(_mm_maskz_loadu_ps): Likewise.
	(_mm256_mask_storeu_ps): Pass float * to
	__builtin_ia32_storeups256_mask.
	(_mm_mask_storeu_ps): ass float * to
	__builtin_ia32_storeups128_mask.
	(_mm256_mask_loadu_epi64): Pass const long long * to
	__builtin_ia32_loaddqudi256_mask.
	(_mm256_maskz_loadu_epi64): Likewise.
	(_mm_mask_loadu_epi64): Pass const long long * to
	__builtin_ia32_loaddqudi128_mask.
	(_mm_maskz_loadu_epi64): Likewise.
	(_mm256_mask_storeu_epi64): Pass long long * to
	__builtin_ia32_storedqudi256_mask.
	(_mm_mask_storeu_epi64): Pass long long * to
	__builtin_ia32_storedqudi128_mask.
	(_mm256_mask_loadu_epi32): Pass const int * to
	__builtin_ia32_loaddqusi256_mask.
	(_mm256_maskz_loadu_epi32): Likewise.
	(_mm_mask_loadu_epi32): Pass const int * to
	__builtin_ia32_loaddqusi128_mask.
	(_mm_maskz_loadu_epi32): Likewise.
	(_mm256_mask_storeu_epi32): Pass int * to
	__builtin_ia32_storedqusi256_mask.
	(_mm_mask_storeu_epi32): Pass int * to
	__builtin_ia32_storedqusi128_mask.
	* config/i386/i386-builtin-types.def (PCSHORT): New.
	(PINT64): Likewise.
	(V64QI_FTYPE_PCCHAR_V64QI_UDI): Likewise.
	(V32HI_FTYPE_PCSHORT_V32HI_USI): Likewise.
	(V32QI_FTYPE_PCCHAR_V32QI_USI): Likewise.
	(V16SF_FTYPE_PCFLOAT_V16SF_UHI): Likewise.
	(V8DF_FTYPE_PCDOUBLE_V8DF_UQI): Likewise.
	(V16SI_FTYPE_PCINT_V16SI_UHI): Likewise.
	(V16HI_FTYPE_PCSHORT_V16HI_UHI): Likewise.
	(V16QI_FTYPE_PCCHAR_V16QI_UHI): Likewise.
	(V8SF_FTYPE_PCFLOAT_V8SF_UQI): Likewise.
	(V8DI_FTYPE_PCINT64_V8DI_UQI): Likewise.
	(V8SI_FTYPE_PCINT_V8SI_UQI): Likewise.
	(V8HI_FTYPE_PCSHORT_V8HI_UQI): Likewise.
	(V4DF_FTYPE_PCDOUBLE_V4DF_UQI): Likewise.
	(V4SF_FTYPE_PCFLOAT_V4SF_UQI): Likewise.
	(V4DI_FTYPE_PCINT64_V4DI_UQI): Likewise.
	(V4SI_FTYPE_PCINT_V4SI_UQI): Likewise.
	(V2DF_FTYPE_PCDOUBLE_V2DF_UQI): Likewise.
	(V2DI_FTYPE_PCINT64_V2DI_UQI): Likewise.
	(VOID_FTYPE_PDOUBLE_V8DF_UQI): Likewise.
	(VOID_FTYPE_PDOUBLE_V4DF_UQI): Likewise.
	(VOID_FTYPE_PDOUBLE_V2DF_UQI): Likewise.
	(VOID_FTYPE_PFLOAT_V16SF_UHI): Likewise.
	(VOID_FTYPE_PFLOAT_V8SF_UQI): Likewise.
	(VOID_FTYPE_PFLOAT_V4SF_UQI): Likewise.
	(VOID_FTYPE_PINT64_V8DI_UQI): Likewise.
	(VOID_FTYPE_PINT64_V4DI_UQI): Likewise.
	(VOID_FTYPE_PINT64_V2DI_UQI): Likewise.
	(VOID_FTYPE_PINT_V16SI_UHI): Likewise.
	(VOID_FTYPE_PINT_V8SI_UHI): Likewise.
	(VOID_FTYPE_PINT_V4SI_UHI): Likewise.
	(VOID_FTYPE_PSHORT_V32HI_USI): Likewise.
	(VOID_FTYPE_PSHORT_V16HI_UHI): Likewise.
	(VOID_FTYPE_PSHORT_V8HI_UQI): Likewise.
	(VOID_FTYPE_PCHAR_V64QI_UDI): Likewise.
	(VOID_FTYPE_PCHAR_V32QI_USI): Likewise.
	(VOID_FTYPE_PCHAR_V16QI_UHI): Likewise.
	(V64QI_FTYPE_PCV64QI_V64QI_UDI): Removed.
	(V32HI_FTYPE_PCV32HI_V32HI_USI): Likewise.
	(V32QI_FTYPE_PCV32QI_V32QI_USI): Likewise.
	(V16HI_FTYPE_PCV16HI_V16HI_UHI): Likewise.
	(V16QI_FTYPE_PCV16QI_V16QI_UHI): Likewise.
	(V8HI_FTYPE_PCV8HI_V8HI_UQI): Likewise.
	(VOID_FTYPE_PV32HI_V32HI_USI): Likewise.
	(VOID_FTYPE_PV16HI_V16HI_UHI): Likewise.
	(VOID_FTYPE_PV8HI_V8HI_UQI): Likewise.
	(VOID_FTYPE_PV64QI_V64QI_UDI): Likewise.
	(VOID_FTYPE_PV32QI_V32QI_USI): Likewise.
	(VOID_FTYPE_PV16QI_V16QI_UHI): Likewise.
	* config/i386/i386.c (ix86_emit_save_reg_using_mov): Don't
	use UNSPEC_STOREU.
	(ix86_emit_restore_sse_regs_using_mov): Don't use UNSPEC_LOADU.
	(ix86_avx256_split_vector_move_misalign): Don't use unaligned
	load nor store.
	(ix86_expand_vector_move_misalign): Likewise.
	(bdesc_special_args): Use CODE_FOR_movvNXY_internal and pointer
	to scalar function prototype for unaligned load/store builtins.
	(ix86_expand_special_args_builtin): Updated.
	* config/i386/sse.md (UNSPEC_LOADU): Removed.
	(UNSPEC_STOREU): Likewise.
	(VI_ULOADSTORE_BW_AVX512VL): Likewise.
	(VI_ULOADSTORE_F_AVX512VL): Likewise.
	(ssescalarsize): Handle V4TI, V2TI and V1TI.
	(<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>): Likewise.
	(*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>): Likewise.
	(<sse>_storeu<ssemodesuffix><avxsizesuffix>): Likewise.
	(<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask): Likewise.
	(<sse2_avx_avx512f>_loaddqu<mode><mask_name>): Likewise.
	(*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"): Likewise.
	(sse2_avx_avx512f>_storedqu<mode>): Likewise.
	(<avx512>_storedqu<mode>_mask): Likewise.
	(*sse4_2_pcmpestr_unaligned): Likewise.
	(*sse4_2_pcmpistr_unaligned): Likewise.
	(*mov<mode>_internal): Renamed to ...
	(mov<mode>_internal): This.  Remove check of AVX and IAMCU on
	misaligned operand.  Replace vmovdqu64 with vmovdqu<ssescalarsize>.
	(movsd/movhpd to movupd peephole): Don't use UNSPEC_LOADU.
	(movlpd/movhpd to movupd peephole): Don't use UNSPEC_STOREU.

gcc/testsuite/

	PR target/69201
	* gcc.target/i386/avx256-unaligned-store-1.c (a): Make it
	extern to force it misaligned.
	(b): Likewise.
	(c): Likewise.
	(d): Likewise.
	Check vmovups.*movv8sf_internal/3 instead of avx_storeups256.
	Don't check `*' before movv4sf_internal.
	* gcc.target/i386/avx256-unaligned-store-2.c: Check
	vmovups.*movv32qi_internal/3 instead of avx_storeups256.
	Don't check `*' before movv16qi_internal.
	* gcc.target/i386/avx256-unaligned-store-3.c (a): Make it
	extern to force it misaligned.
	(b): Likewise.
	(c): Likewise.
	(d): Likewise.
	Check vmovups.*movv4df_internal/3 instead of avx_storeupd256.
	Don't check `*' before movv2df_internal.
	* gcc.target/i386/avx256-unaligned-store-4.c (a): Make it
	extern to force it misaligned.
	(b): Likewise.
	(c): Likewise.
	(d): Likewise.
	Check movv8sf_internal instead of avx_storeups256.
	Check movups.*movv4sf_internal/3 instead of avx_storeups256.
---
 gcc/config/i386/avx512bwintrin.h                   |  12 +-
 gcc/config/i386/avx512fintrin.h                    |  36 +-
 gcc/config/i386/avx512vlbwintrin.h                 |  24 +-
 gcc/config/i386/avx512vlintrin.h                   |  48 +-
 gcc/config/i386/i386-builtin-types.def             |  50 +-
 gcc/config/i386/i386.c                             | 242 +++++-----
 gcc/config/i386/sse.md                             | 504 +--------------------
 .../gcc.target/i386/avx256-unaligned-store-1.c     |   6 +-
 .../gcc.target/i386/avx256-unaligned-store-2.c     |   4 +-
 .../gcc.target/i386/avx256-unaligned-store-3.c     |   6 +-
 .../gcc.target/i386/avx256-unaligned-store-4.c     |   7 +-
 11 files changed, 230 insertions(+), 709 deletions(-)

diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h
index f40a7d9..e1dafba 100644
--- a/gcc/config/i386/avx512bwintrin.h
+++ b/gcc/config/i386/avx512bwintrin.h
@@ -87,7 +87,7 @@ extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
 {
-  return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
+  return (__m512i) __builtin_ia32_loaddquhi512_mask ((const short *) __P,
 						     (__v32hi) __W,
 						     (__mmask32) __U);
 }
@@ -96,7 +96,7 @@ extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
 {
-  return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
+  return (__m512i) __builtin_ia32_loaddquhi512_mask ((const short *) __P,
 						     (__v32hi)
 						     _mm512_setzero_hi (),
 						     (__mmask32) __U);
@@ -106,7 +106,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A)
 {
-  __builtin_ia32_storedquhi512_mask ((__v32hi *) __P,
+  __builtin_ia32_storedquhi512_mask ((short *) __P,
 				     (__v32hi) __A,
 				     (__mmask32) __U);
 }
@@ -150,7 +150,7 @@ extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
 {
-  return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
+  return (__m512i) __builtin_ia32_loaddquqi512_mask ((const char *) __P,
 						     (__v64qi) __W,
 						     (__mmask64) __U);
 }
@@ -159,7 +159,7 @@ extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
 {
-  return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
+  return (__m512i) __builtin_ia32_loaddquqi512_mask ((const char *) __P,
 						     (__v64qi)
 						     _mm512_setzero_hi (),
 						     (__mmask64) __U);
@@ -169,7 +169,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
 {
-  __builtin_ia32_storedquqi512_mask ((__v64qi *) __P,
+  __builtin_ia32_storedquqi512_mask ((char *) __P,
 				     (__v64qi) __A,
 				     (__mmask64) __U);
 }
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index e009d8c..2f51be9 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -5671,7 +5671,7 @@ extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_loadu_pd (void const *__P)
 {
-  return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
+  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
 						   (__v8df)
 						   _mm512_undefined_pd (),
 						   (__mmask8) -1);
@@ -5681,7 +5681,7 @@ extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
 {
-  return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
+  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
 						   (__v8df) __W,
 						   (__mmask8) __U);
 }
@@ -5690,7 +5690,7 @@ extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
 {
-  return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
+  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
 						   (__v8df)
 						   _mm512_setzero_pd (),
 						   (__mmask8) __U);
@@ -5700,7 +5700,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_storeu_pd (void *__P, __m512d __A)
 {
-  __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
+  __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
 				   (__mmask8) -1);
 }
 
@@ -5708,7 +5708,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
 {
-  __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
+  __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
 				   (__mmask8) __U);
 }
 
@@ -5716,7 +5716,7 @@ extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_loadu_ps (void const *__P)
 {
-  return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
+  return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
 						  (__v16sf)
 						  _mm512_undefined_ps (),
 						  (__mmask16) -1);
@@ -5726,7 +5726,7 @@ extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
 {
-  return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
+  return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
 						  (__v16sf) __W,
 						  (__mmask16) __U);
 }
@@ -5735,7 +5735,7 @@ extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
 {
-  return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
+  return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
 						  (__v16sf)
 						  _mm512_setzero_ps (),
 						  (__mmask16) __U);
@@ -5745,7 +5745,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_storeu_ps (void *__P, __m512 __A)
 {
-  __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
+  __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
 				   (__mmask16) -1);
 }
 
@@ -5753,7 +5753,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
 {
-  __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
+  __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
 				   (__mmask16) __U);
 }
 
@@ -5761,7 +5761,7 @@ extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
 {
-  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
+  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
 						     (__v8di) __W,
 						     (__mmask8) __U);
 }
@@ -5770,7 +5770,7 @@ extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
 {
-  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
+  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
 						     (__v8di)
 						     _mm512_setzero_si512 (),
 						     (__mmask8) __U);
@@ -5780,7 +5780,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
 {
-  __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A,
+  __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
 				     (__mmask8) __U);
 }
 
@@ -5788,7 +5788,7 @@ extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_loadu_si512 (void const *__P)
 {
-  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
+  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
 						     (__v16si)
 						     _mm512_setzero_si512 (),
 						     (__mmask16) -1);
@@ -5798,7 +5798,7 @@ extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
 {
-  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
+  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
 						     (__v16si) __W,
 						     (__mmask16) __U);
 }
@@ -5807,7 +5807,7 @@ extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
 {
-  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
+  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
 						     (__v16si)
 						     _mm512_setzero_si512 (),
 						     (__mmask16) __U);
@@ -5817,7 +5817,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_storeu_si512 (void *__P, __m512i __A)
 {
-  __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
+  __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
 				     (__mmask16) -1);
 }
 
@@ -5825,7 +5825,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
 {
-  __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
+  __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
 				     (__mmask16) __U);
 }
 
diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h
index f260526..5f3d51c 100644
--- a/gcc/config/i386/avx512vlbwintrin.h
+++ b/gcc/config/i386/avx512vlbwintrin.h
@@ -77,7 +77,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
 {
-  __builtin_ia32_storedquqi256_mask ((__v32qi *) __P,
+  __builtin_ia32_storedquqi256_mask ((char *) __P,
 				     (__v32qi) __A,
 				     (__mmask32) __U);
 }
@@ -86,7 +86,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
 {
-  __builtin_ia32_storedquqi128_mask ((__v16qi *) __P,
+  __builtin_ia32_storedquqi128_mask ((char *) __P,
 				     (__v16qi) __A,
 				     (__mmask16) __U);
 }
@@ -95,7 +95,7 @@ extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
 {
-  return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
+  return (__m256i) __builtin_ia32_loaddquhi256_mask ((const short *) __P,
 						     (__v16hi) __W,
 						     (__mmask16) __U);
 }
@@ -104,7 +104,7 @@ extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
 {
-  return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
+  return (__m256i) __builtin_ia32_loaddquhi256_mask ((const short *) __P,
 						     (__v16hi)
 						     _mm256_setzero_si256 (),
 						     (__mmask16) __U);
@@ -114,7 +114,7 @@ extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
 {
-  return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
+  return (__m128i) __builtin_ia32_loaddquhi128_mask ((const short *) __P,
 						     (__v8hi) __W,
 						     (__mmask8) __U);
 }
@@ -123,7 +123,7 @@ extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
 {
-  return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
+  return (__m128i) __builtin_ia32_loaddquhi128_mask ((const short *) __P,
 						     (__v8hi)
 						     _mm_setzero_hi (),
 						     (__mmask8) __U);
@@ -172,7 +172,7 @@ extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
 {
-  return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
+  return (__m256i) __builtin_ia32_loaddquqi256_mask ((const char *) __P,
 						     (__v32qi) __W,
 						     (__mmask32) __U);
 }
@@ -181,7 +181,7 @@ extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
 {
-  return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
+  return (__m256i) __builtin_ia32_loaddquqi256_mask ((const char *) __P,
 						     (__v32qi)
 						     _mm256_setzero_si256 (),
 						     (__mmask32) __U);
@@ -191,7 +191,7 @@ extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
 {
-  return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
+  return (__m128i) __builtin_ia32_loaddquqi128_mask ((const char *) __P,
 						     (__v16qi) __W,
 						     (__mmask16) __U);
 }
@@ -200,7 +200,7 @@ extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
 {
-  return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
+  return (__m128i) __builtin_ia32_loaddquqi128_mask ((const char *) __P,
 						     (__v16qi)
 						     _mm_setzero_hi (),
 						     (__mmask16) __U);
@@ -3679,7 +3679,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
 {
-  __builtin_ia32_storedquhi256_mask ((__v16hi *) __P,
+  __builtin_ia32_storedquhi256_mask ((short *) __P,
 				     (__v16hi) __A,
 				     (__mmask16) __U);
 }
@@ -3688,7 +3688,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
 {
-  __builtin_ia32_storedquhi128_mask ((__v8hi *) __P,
+  __builtin_ia32_storedquhi128_mask ((short *) __P,
 				     (__v8hi) __A,
 				     (__mmask8) __U);
 }
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index d0ffb2b..d59bc6c 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -626,7 +626,7 @@ extern __inline __m256d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
 {
-  return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
+  return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
 						   (__v4df) __W,
 						   (__mmask8) __U);
 }
@@ -635,7 +635,7 @@ extern __inline __m256d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
 {
-  return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
+  return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
 						   (__v4df)
 						   _mm256_setzero_pd (),
 						   (__mmask8) __U);
@@ -645,7 +645,7 @@ extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
 {
-  return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
+  return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
 						   (__v2df) __W,
 						   (__mmask8) __U);
 }
@@ -654,7 +654,7 @@ extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
 {
-  return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
+  return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
 						   (__v2df)
 						   _mm_setzero_pd (),
 						   (__mmask8) __U);
@@ -664,7 +664,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
 {
-  __builtin_ia32_storeupd256_mask ((__v4df *) __P,
+  __builtin_ia32_storeupd256_mask ((double *) __P,
 				   (__v4df) __A,
 				   (__mmask8) __U);
 }
@@ -673,7 +673,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
 {
-  __builtin_ia32_storeupd128_mask ((__v2df *) __P,
+  __builtin_ia32_storeupd128_mask ((double *) __P,
 				   (__v2df) __A,
 				   (__mmask8) __U);
 }
@@ -682,7 +682,7 @@ extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
 {
-  return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
+  return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
 						  (__v8sf) __W,
 						  (__mmask8) __U);
 }
@@ -691,7 +691,7 @@ extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
 {
-  return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
+  return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
 						  (__v8sf)
 						  _mm256_setzero_ps (),
 						  (__mmask8) __U);
@@ -701,7 +701,7 @@ extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
 {
-  return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
+  return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
 						  (__v4sf) __W,
 						  (__mmask8) __U);
 }
@@ -710,7 +710,7 @@ extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
 {
-  return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
+  return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
 						  (__v4sf)
 						  _mm_setzero_ps (),
 						  (__mmask8) __U);
@@ -720,7 +720,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
 {
-  __builtin_ia32_storeups256_mask ((__v8sf *) __P,
+  __builtin_ia32_storeups256_mask ((float *) __P,
 				   (__v8sf) __A,
 				   (__mmask8) __U);
 }
@@ -729,7 +729,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
 {
-  __builtin_ia32_storeups128_mask ((__v4sf *) __P,
+  __builtin_ia32_storeups128_mask ((float *) __P,
 				   (__v4sf) __A,
 				   (__mmask8) __U);
 }
@@ -738,7 +738,7 @@ extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
 {
-  return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
+  return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
 						     (__v4di) __W,
 						     (__mmask8) __U);
 }
@@ -747,7 +747,7 @@ extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
 {
-  return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
+  return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
 						     (__v4di)
 						     _mm256_setzero_si256 (),
 						     (__mmask8) __U);
@@ -757,7 +757,7 @@ extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
 {
-  return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
+  return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
 						     (__v2di) __W,
 						     (__mmask8) __U);
 }
@@ -766,7 +766,7 @@ extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
 {
-  return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
+  return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
 						     (__v2di)
 						     _mm_setzero_di (),
 						     (__mmask8) __U);
@@ -776,7 +776,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
 {
-  __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
+  __builtin_ia32_storedqudi256_mask ((long long *) __P,
 				     (__v4di) __A,
 				     (__mmask8) __U);
 }
@@ -785,7 +785,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
 {
-  __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
+  __builtin_ia32_storedqudi128_mask ((long long *) __P,
 				     (__v2di) __A,
 				     (__mmask8) __U);
 }
@@ -794,7 +794,7 @@ extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
 {
-  return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
+  return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
 						     (__v8si) __W,
 						     (__mmask8) __U);
 }
@@ -803,7 +803,7 @@ extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
 {
-  return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
+  return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
 						     (__v8si)
 						     _mm256_setzero_si256 (),
 						     (__mmask8) __U);
@@ -813,7 +813,7 @@ extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
 {
-  return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
+  return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
 						     (__v4si) __W,
 						     (__mmask8) __U);
 }
@@ -822,7 +822,7 @@ extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
 {
-  return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
+  return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
 						     (__v4si)
 						     _mm_setzero_si128 (),
 						     (__mmask8) __U);
@@ -832,7 +832,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
 {
-  __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
+  __builtin_ia32_storedqusi256_mask ((int *) __P,
 				     (__v8si) __A,
 				     (__mmask8) __U);
 }
@@ -841,7 +841,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
 {
-  __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
+  __builtin_ia32_storedqusi128_mask ((int *) __P,
 				     (__v4si) __A,
 				     (__mmask8) __U);
 }
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index b892f08..75d57d9 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -124,6 +124,7 @@ DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST)
 DEF_POINTER_TYPE (PCFLOAT, FLOAT, CONST)
 DEF_POINTER_TYPE (PCINT, INT, CONST)
 DEF_POINTER_TYPE (PCINT64, INT64, CONST)
+DEF_POINTER_TYPE (PCSHORT, SHORT, CONST)
 DEF_POINTER_TYPE (PCHAR, CHAR)
 DEF_POINTER_TYPE (PCVOID, VOID, CONST)
 DEF_POINTER_TYPE (PVOID, VOID)
@@ -132,6 +133,7 @@ DEF_POINTER_TYPE (PFLOAT, FLOAT)
 DEF_POINTER_TYPE (PSHORT, SHORT)
 DEF_POINTER_TYPE (PUSHORT, USHORT)
 DEF_POINTER_TYPE (PINT, INT)
+DEF_POINTER_TYPE (PINT64, INT64)
 DEF_POINTER_TYPE (PLONGLONG, LONGLONG)
 DEF_POINTER_TYPE (PULONGLONG, ULONGLONG)
 DEF_POINTER_TYPE (PUNSIGNED, UNSIGNED)
@@ -754,24 +756,36 @@ DEF_FUNCTION_TYPE (V16HI, V8HI, V16HI, UHI)
 DEF_FUNCTION_TYPE (V16HI, HI, V16HI, UHI)
 DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, UQI)
 DEF_FUNCTION_TYPE (V8HI, HI, V8HI, UQI)
-DEF_FUNCTION_TYPE (V64QI, PCV64QI, V64QI, UDI)
-DEF_FUNCTION_TYPE (V32HI, PCV32HI, V32HI, USI)
-DEF_FUNCTION_TYPE (V32QI, PCV32QI, V32QI, USI)
 DEF_FUNCTION_TYPE (V16SF, PCV16SF, V16SF, UHI)
 DEF_FUNCTION_TYPE (V8DF, PCV8DF, V8DF, UQI)
 DEF_FUNCTION_TYPE (V16SI, PCV16SI, V16SI, UHI)
-DEF_FUNCTION_TYPE (V16HI, PCV16HI, V16HI, UHI)
-DEF_FUNCTION_TYPE (V16QI, PCV16QI, V16QI, UHI)
 DEF_FUNCTION_TYPE (V8SF, PCV8SF, V8SF, UQI)
 DEF_FUNCTION_TYPE (V8DI, PCV8DI, V8DI, UQI)
 DEF_FUNCTION_TYPE (V8SI, PCV8SI, V8SI, UQI)
-DEF_FUNCTION_TYPE (V8HI, PCV8HI, V8HI, UQI)
 DEF_FUNCTION_TYPE (V4DF, PCV4DF, V4DF, UQI)
 DEF_FUNCTION_TYPE (V4SF, PCV4SF, V4SF, UQI)
 DEF_FUNCTION_TYPE (V4DI, PCV4DI, V4DI, UQI)
 DEF_FUNCTION_TYPE (V4SI, PCV4SI, V4SI, UQI)
 DEF_FUNCTION_TYPE (V2DF, PCV2DF, V2DF, UQI)
 DEF_FUNCTION_TYPE (V2DI, PCV2DI, V2DI, UQI)
+DEF_FUNCTION_TYPE (V64QI, PCCHAR, V64QI, UDI)
+DEF_FUNCTION_TYPE (V32HI, PCSHORT, V32HI, USI)
+DEF_FUNCTION_TYPE (V32QI, PCCHAR, V32QI, USI)
+DEF_FUNCTION_TYPE (V16SF, PCFLOAT, V16SF, UHI)
+DEF_FUNCTION_TYPE (V8DF, PCDOUBLE, V8DF, UQI)
+DEF_FUNCTION_TYPE (V16SI, PCINT, V16SI, UHI)
+DEF_FUNCTION_TYPE (V16HI, PCSHORT, V16HI, UHI)
+DEF_FUNCTION_TYPE (V16QI, PCCHAR, V16QI, UHI)
+DEF_FUNCTION_TYPE (V8SF, PCFLOAT, V8SF, UQI)
+DEF_FUNCTION_TYPE (V8DI, PCINT64, V8DI, UQI)
+DEF_FUNCTION_TYPE (V8SI, PCINT, V8SI, UQI)
+DEF_FUNCTION_TYPE (V8HI, PCSHORT, V8HI, UQI)
+DEF_FUNCTION_TYPE (V4DF, PCDOUBLE, V4DF, UQI)
+DEF_FUNCTION_TYPE (V4SF, PCFLOAT, V4SF, UQI)
+DEF_FUNCTION_TYPE (V4DI, PCINT64, V4DI, UQI)
+DEF_FUNCTION_TYPE (V4SI, PCINT, V4SI, UQI)
+DEF_FUNCTION_TYPE (V2DF, PCDOUBLE, V2DF, UQI)
+DEF_FUNCTION_TYPE (V2DI, PCINT64, V2DI, UQI)
 DEF_FUNCTION_TYPE (V16HI, V16SI, V16HI, UHI)
 DEF_FUNCTION_TYPE (V8SI, V8DI, V8SI, UQI)
 DEF_FUNCTION_TYPE (V8HI, V8DI, V8HI, UQI)
@@ -823,12 +837,24 @@ DEF_FUNCTION_TYPE (VOID, PV16QI, V4DI, UQI)
 DEF_FUNCTION_TYPE (VOID, PV16QI, V2DI, UQI)
 DEF_FUNCTION_TYPE (VOID, PV8SI, V8SI, UQI)
 DEF_FUNCTION_TYPE (VOID, PV4SI, V4SI, UQI)
-DEF_FUNCTION_TYPE (VOID, PV32HI, V32HI, USI)
-DEF_FUNCTION_TYPE (VOID, PV16HI, V16HI, UHI)
-DEF_FUNCTION_TYPE (VOID, PV8HI, V8HI, UQI)
-DEF_FUNCTION_TYPE (VOID, PV64QI, V64QI, UDI)
-DEF_FUNCTION_TYPE (VOID, PV32QI, V32QI, USI)
-DEF_FUNCTION_TYPE (VOID, PV16QI, V16QI, UHI)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, V8DF, UQI)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, V4DF, UQI)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, V2DF, UQI)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, V16SF, UHI)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, V8SF, UQI)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, V4SF, UQI)
+DEF_FUNCTION_TYPE (VOID, PINT64, V8DI, UQI)
+DEF_FUNCTION_TYPE (VOID, PINT64, V4DI, UQI)
+DEF_FUNCTION_TYPE (VOID, PINT64, V2DI, UQI)
+DEF_FUNCTION_TYPE (VOID, PINT, V16SI, UHI)
+DEF_FUNCTION_TYPE (VOID, PINT, V8SI, UQI)
+DEF_FUNCTION_TYPE (VOID, PINT, V4SI, UQI)
+DEF_FUNCTION_TYPE (VOID, PSHORT, V32HI, USI)
+DEF_FUNCTION_TYPE (VOID, PSHORT, V16HI, UHI)
+DEF_FUNCTION_TYPE (VOID, PSHORT, V8HI, UQI)
+DEF_FUNCTION_TYPE (VOID, PCHAR, V64QI, UDI)
+DEF_FUNCTION_TYPE (VOID, PCHAR, V32QI, USI)
+DEF_FUNCTION_TYPE (VOID, PCHAR, V16QI, UHI)
 DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI, INT, UQI)
 DEF_FUNCTION_TYPE (V8SI, V8SF, V8SI, UQI)
 DEF_FUNCTION_TYPE (V4SI, V4SF, V4SI, UQI)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3d044e8..4e48572 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -11706,7 +11706,6 @@ ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
 {
   struct machine_function *m = cfun->machine;
   rtx reg = gen_rtx_REG (mode, regno);
-  rtx unspec = NULL_RTX;
   rtx mem, addr, base, insn;
   unsigned int align;
 
@@ -11717,13 +11716,7 @@ ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
   align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY);
   set_mem_align (mem, align);
 
-  /* SSE saves are not within re-aligned local stack frame.
-     In case INCOMING_STACK_BOUNDARY is misaligned, we have
-     to emit unaligned store.  */
-  if (mode == V4SFmode && align < 128)
-    unspec = gen_rtx_UNSPEC (mode, gen_rtvec (1, reg), UNSPEC_STOREU);
-
-  insn = emit_insn (gen_rtx_SET (mem, unspec ? unspec : reg));
+  insn = emit_insn (gen_rtx_SET (mem, reg));
   RTX_FRAME_RELATED_P (insn) = 1;
 
   base = addr;
@@ -11770,8 +11763,6 @@ ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
       mem = gen_rtx_MEM (mode, addr);
       add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
     }
-  else if (unspec)
-    add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
 }
 
 /* Emit code to save registers using MOV insns.
@@ -13323,18 +13314,7 @@ ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
 	/* The location is aligned up to INCOMING_STACK_BOUNDARY.  */
 	align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY);
 	set_mem_align (mem, align);
-
-	/* SSE saves are not within re-aligned local stack frame.
-	   In case INCOMING_STACK_BOUNDARY is misaligned, we have
-	   to emit unaligned load.  */
-	if (align < 128)
-	  {
-	    rtx unspec = gen_rtx_UNSPEC (V4SFmode, gen_rtvec (1, mem),
-					 UNSPEC_LOADU);
-	    emit_insn (gen_rtx_SET (reg, unspec));
-	  }
-	else
-	  emit_insn (gen_rtx_SET (reg, mem));
+	emit_insn (gen_rtx_SET (reg, mem));
 
 	ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
 
@@ -18838,8 +18818,6 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
 {
   rtx m;
   rtx (*extract) (rtx, rtx, rtx);
-  rtx (*load_unaligned) (rtx, rtx);
-  rtx (*store_unaligned) (rtx, rtx);
   machine_mode mode;
 
   switch (GET_MODE (op0))
@@ -18848,20 +18826,14 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
       gcc_unreachable ();
     case V32QImode:
       extract = gen_avx_vextractf128v32qi;
-      load_unaligned = gen_avx_loaddquv32qi;
-      store_unaligned = gen_avx_storedquv32qi;
       mode = V16QImode;
       break;
     case V8SFmode:
       extract = gen_avx_vextractf128v8sf;
-      load_unaligned = gen_avx_loadups256;
-      store_unaligned = gen_avx_storeups256;
       mode = V4SFmode;
       break;
     case V4DFmode:
       extract = gen_avx_vextractf128v4df;
-      load_unaligned = gen_avx_loadupd256;
-      store_unaligned = gen_avx_storeupd256;
       mode = V2DFmode;
       break;
     }
@@ -18878,14 +18850,8 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
 	  r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
 	  emit_move_insn (op0, r);
 	}
-      /* Normal *mov<mode>_internal pattern will handle
-	 unaligned loads just fine if misaligned_operand
-	 is true, and without the UNSPEC it can be combined
-	 with arithmetic instructions.  */
-      else if (misaligned_operand (op1, GET_MODE (op1)))
-	emit_insn (gen_rtx_SET (op0, op1));
       else
-	emit_insn (load_unaligned (op0, op1));
+	emit_insn (gen_rtx_SET (op0, op1));
     }
   else if (MEM_P (op0))
     {
@@ -18898,7 +18864,7 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
 	  emit_insn (extract (m, op1, const1_rtx));
 	}
       else
-	emit_insn (store_unaligned (op0, op1));
+	emit_insn (gen_rtx_SET (op0, op1));
     }
   else
     gcc_unreachable ();
@@ -18960,8 +18926,6 @@ void
 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
 {
   rtx op0, op1, orig_op0 = NULL_RTX, m;
-  rtx (*load_unaligned) (rtx, rtx);
-  rtx (*store_unaligned) (rtx, rtx);
 
   op0 = operands[0];
   op1 = operands[1];
@@ -18986,30 +18950,8 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
 	  /* FALLTHRU */
 
 	case MODE_VECTOR_FLOAT:
-	  switch (GET_MODE (op0))
-	    {
-	    default:
-	      gcc_unreachable ();
-	    case V16SImode:
-	      load_unaligned = gen_avx512f_loaddquv16si;
-	      store_unaligned = gen_avx512f_storedquv16si;
-	      break;
-	    case V16SFmode:
-	      load_unaligned = gen_avx512f_loadups512;
-	      store_unaligned = gen_avx512f_storeups512;
-	      break;
-	    case V8DFmode:
-	      load_unaligned = gen_avx512f_loadupd512;
-	      store_unaligned = gen_avx512f_storeupd512;
-	      break;
-	    }
 
-	  if (MEM_P (op1))
-	    emit_insn (load_unaligned (op0, op1));
-	  else if (MEM_P (op0))
-	    emit_insn (store_unaligned (op0, op1));
-	  else
-	    gcc_unreachable ();
+	  emit_insn (gen_rtx_SET (op0, op1));
 	  if (orig_op0)
 	    emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
 	  break;
@@ -19077,7 +19019,7 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
 	    }
 	  op1 = gen_lowpart (V16QImode, op1);
 	  /* We will eventually emit movups based on insn attributes.  */
-	  emit_insn (gen_sse2_loaddquv16qi (op0, op1));
+	  emit_insn (gen_rtx_SET (op0, op1));
 	  if (orig_op0)
 	    emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
 	}
@@ -19091,7 +19033,7 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
 	      || optimize_insn_for_size_p ())
 	    {
 	      /* We will eventually emit movups based on insn attributes.  */
-	      emit_insn (gen_sse2_loadupd (op0, op1));
+	      emit_insn (gen_rtx_SET (op0, op1));
 	      return;
 	    }
 
@@ -19135,7 +19077,7 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
 		  op0 = gen_reg_rtx (V4SFmode);
 		}
 	      op1 = gen_lowpart (V4SFmode, op1);
-	      emit_insn (gen_sse_loadups (op0, op1));
+	      emit_insn (gen_rtx_SET (op0, op1));
 	      if (orig_op0)
 		emit_move_insn (orig_op0,
 				gen_lowpart (GET_MODE (orig_op0), op0));
@@ -19167,7 +19109,7 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
 	  op0 = gen_lowpart (V16QImode, op0);
 	  op1 = gen_lowpart (V16QImode, op1);
 	  /* We will eventually emit movups based on insn attributes.  */
-	  emit_insn (gen_sse2_storedquv16qi (op0, op1));
+	  emit_insn (gen_rtx_SET (op0, op1));
 	}
       else if (TARGET_SSE2 && mode == V2DFmode)
 	{
@@ -19176,7 +19118,7 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
 	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
 	      || optimize_insn_for_size_p ())
 	    /* We will eventually emit movups based on insn attributes.  */
-	    emit_insn (gen_sse2_storeupd (op0, op1));
+	    emit_insn (gen_rtx_SET (op0, op1));
 	  else
 	    {
 	      m = adjust_address (op0, DFmode, 0);
@@ -19196,7 +19138,7 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
 	      || optimize_insn_for_size_p ())
 	    {
 	      op0 = gen_lowpart (V4SFmode, op0);
-	      emit_insn (gen_sse_storeups (op0, op1));
+	      emit_insn (gen_rtx_SET (op0, op1));
 	    }
 	  else
 	    {
@@ -32655,9 +32597,9 @@ static const struct builtin_description bdesc_special_args[] =
   { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
 
   /* SSE */
-  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_movv4sf_internal, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
-  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_movv4sf_internal, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
 
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
@@ -32671,14 +32613,14 @@ static const struct builtin_description bdesc_special_args[] =
   /* SSE2 */
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_movv2df_internal, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_movv16qi_internal, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_movv2df_internal, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_movv16qi_internal, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
 
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
@@ -32703,12 +32645,12 @@ static const struct builtin_description bdesc_special_args[] =
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
 
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_movv4df_internal, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_movv8sf_internal, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_movv4df_internal, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_movv8sf_internal, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_movv32qi_internal, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_movv32qi_internal, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
 
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
@@ -32748,10 +32690,10 @@ static const struct builtin_description bdesc_special_args[] =
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
-  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
-  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
-  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
-  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCINT_V16SI_UHI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCINT64_V8DI_UQI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCDOUBLE_V8DF_UQI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCFLOAT_V16SF_UHI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
@@ -32760,9 +32702,9 @@ static const struct builtin_description bdesc_special_args[] =
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
-  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
-  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
-  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PINT_V16SI_UHI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PINT64_V8DI_UQI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF_UQI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
@@ -32778,7 +32720,7 @@ static const struct builtin_description bdesc_special_args[] =
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
-  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF_UHI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
@@ -32807,16 +32749,16 @@ static const struct builtin_description bdesc_special_args[] =
   { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
 
   /* AVX512BW */
-  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI },
-  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI },
-  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI },
-  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI },
+  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCSHORT_V32HI_USI },
+  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCCHAR_V64QI_UDI },
+  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storev32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V32HI_USI },
+  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storev64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V64QI_UDI },
 
   /* AVX512VL */
-  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_UHI },
-  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI },
-  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI },
-  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_UHI },
+  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCSHORT_V16HI_UHI },
+  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCSHORT_V8HI_UQI },
+  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCCHAR_V32QI_USI },
+  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCCHAR_V16QI_UHI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
@@ -32833,26 +32775,26 @@ static const struct builtin_description bdesc_special_args[] =
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
-  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
-  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
-  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
-  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
-  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
-  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
-  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
-  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
-  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
-  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
-  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
-  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
-  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
-  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
-  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
-  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
-  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI },
-  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI },
-  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI },
-  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI },
+  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE_V4DF_UQI },
+  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE_V2DF_UQI },
+  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT_V8SF_UQI },
+  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT_V4SF_UQI },
+  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF_UQI },
+  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF_UQI },
+  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF_UQI },
+  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF_UQI },
+  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCINT64_V4DI_UQI },
+  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCINT64_V2DI_UQI },
+  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCINT_V8SI_UQI },
+  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCINT_V4SI_UQI },
+  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PINT64_V4DI_UQI },
+  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PINT64_V2DI_UQI },
+  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PINT_V8SI_UQI },
+  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PINT_V4SI_UQI },
+  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V16HI_UHI },
+  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V8HI_UQI },
+  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI_USI },
+  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI_UHI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
@@ -33984,10 +33926,10 @@ static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
-  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
-  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
-  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
-  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
+  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
+  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
+  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
+  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
@@ -34729,8 +34671,8 @@ static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask",  IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT },
-  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
-  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
+  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
+  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI },
   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI },
@@ -39895,12 +39837,24 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
     case VOID_FTYPE_PV16QI_V2DI_UQI:
     case VOID_FTYPE_PV16QI_V8SI_UQI:
     case VOID_FTYPE_PV16QI_V4SI_UQI:
-    case VOID_FTYPE_PV8HI_V8HI_UQI:
-    case VOID_FTYPE_PV16HI_V16HI_UHI:
-    case VOID_FTYPE_PV32HI_V32HI_USI:
-    case VOID_FTYPE_PV16QI_V16QI_UHI:
-    case VOID_FTYPE_PV32QI_V32QI_USI:
-    case VOID_FTYPE_PV64QI_V64QI_UDI:
+    case VOID_FTYPE_PCHAR_V64QI_UDI:
+    case VOID_FTYPE_PCHAR_V32QI_USI:
+    case VOID_FTYPE_PCHAR_V16QI_UHI:
+    case VOID_FTYPE_PSHORT_V32HI_USI:
+    case VOID_FTYPE_PSHORT_V16HI_UHI:
+    case VOID_FTYPE_PSHORT_V8HI_UQI:
+    case VOID_FTYPE_PINT_V16SI_UHI:
+    case VOID_FTYPE_PINT_V8SI_UQI:
+    case VOID_FTYPE_PINT_V4SI_UQI:
+    case VOID_FTYPE_PINT64_V8DI_UQI:
+    case VOID_FTYPE_PINT64_V4DI_UQI:
+    case VOID_FTYPE_PINT64_V2DI_UQI:
+    case VOID_FTYPE_PDOUBLE_V8DF_UQI:
+    case VOID_FTYPE_PDOUBLE_V4DF_UQI:
+    case VOID_FTYPE_PDOUBLE_V2DF_UQI:
+    case VOID_FTYPE_PFLOAT_V16SF_UHI:
+    case VOID_FTYPE_PFLOAT_V8SF_UQI:
+    case VOID_FTYPE_PFLOAT_V4SF_UQI:
       nargs = 2;
       klass = store;
       /* Reserve memory operand for target.  */
@@ -39918,15 +39872,6 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
     case V2DI_FTYPE_PCV2DI_V2DI_UQI:
     case V4DI_FTYPE_PCV4DI_V4DI_UQI:
     case V8DI_FTYPE_PCV8DI_V8DI_UQI:
-    case V8HI_FTYPE_PCV8HI_V8HI_UQI:
-    case V16HI_FTYPE_PCV16HI_V16HI_UHI:
-    case V32HI_FTYPE_PCV32HI_V32HI_USI:
-    case V16QI_FTYPE_PCV16QI_V16QI_UHI:
-    case V32QI_FTYPE_PCV32QI_V32QI_USI:
-    case V64QI_FTYPE_PCV64QI_V64QI_UDI:
-      nargs = 3;
-      klass = load;
-      memory = 0;
       switch (icode)
 	{
 	/* These builtins and instructions require the memory
@@ -39954,6 +39899,27 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
 	default:
 	  break;
 	}
+    case V64QI_FTYPE_PCCHAR_V64QI_UDI:
+    case V32QI_FTYPE_PCCHAR_V32QI_USI:
+    case V16QI_FTYPE_PCCHAR_V16QI_UHI:
+    case V32HI_FTYPE_PCSHORT_V32HI_USI:
+    case V16HI_FTYPE_PCSHORT_V16HI_UHI:
+    case V8HI_FTYPE_PCSHORT_V8HI_UQI:
+    case V16SI_FTYPE_PCINT_V16SI_UHI:
+    case V8SI_FTYPE_PCINT_V8SI_UQI:
+    case V4SI_FTYPE_PCINT_V4SI_UQI:
+    case V8DI_FTYPE_PCINT64_V8DI_UQI:
+    case V4DI_FTYPE_PCINT64_V4DI_UQI:
+    case V2DI_FTYPE_PCINT64_V2DI_UQI:
+    case V8DF_FTYPE_PCDOUBLE_V8DF_UQI:
+    case V4DF_FTYPE_PCDOUBLE_V4DF_UQI:
+    case V2DF_FTYPE_PCDOUBLE_V2DF_UQI:
+    case V16SF_FTYPE_PCFLOAT_V16SF_UHI:
+    case V8SF_FTYPE_PCFLOAT_V8SF_UQI:
+    case V4SF_FTYPE_PCFLOAT_V4SF_UQI:
+      nargs = 3;
+      klass = load;
+      memory = 0;
       break;
     case VOID_FTYPE_UINT_UINT_UINT:
     case VOID_FTYPE_UINT64_UINT_UINT:
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 1ffb3b9..dead70b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -20,8 +20,6 @@
 (define_c_enum "unspec" [
   ;; SSE
   UNSPEC_MOVNT
-  UNSPEC_LOADU
-  UNSPEC_STOREU
 
   ;; SSE3
   UNSPEC_LDDQU
@@ -290,14 +288,6 @@
 (define_mode_iterator VI1
   [(V32QI "TARGET_AVX") V16QI])
 
-(define_mode_iterator VI_ULOADSTORE_BW_AVX512VL
-  [V64QI
-   V32HI (V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL")])
-
-(define_mode_iterator VI_ULOADSTORE_F_AVX512VL
-  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
-   V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
-
 ;; All DImode vector integer modes
 (define_mode_iterator V_AVX
   [V16QI V8HI V4SI V2DI V4SF V2DF
@@ -730,7 +720,8 @@
    (V4SF "3") (V2DF "1")])
 
 (define_mode_attr ssescalarsize
-  [(V8DI  "64") (V4DI  "64") (V2DI  "64")
+  [(V4TI  "64") (V2TI  "64") (V1TI  "64")
+   (V8DI  "64") (V4DI  "64") (V2DI  "64")
    (V64QI "8") (V32QI "8") (V16QI "8")
    (V32HI "16") (V16HI "16") (V8HI "16")
    (V16SI "32") (V8SI "32") (V4SI "32")
@@ -841,7 +832,7 @@
   DONE;
 })
 
-(define_insn "*mov<mode>_internal"
+(define_insn "mov<mode>_internal"
   [(set (match_operand:VMOVE 0 "nonimmediate_operand"               "=v,v ,m")
 	(match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"  "BC,vm,v"))]
   "TARGET_SSE
@@ -902,9 +893,8 @@
 	case MODE_V16SF:
 	case MODE_V8SF:
 	case MODE_V4SF:
-	  if ((TARGET_AVX || TARGET_IAMCU)
-	      && (misaligned_operand (operands[0], <MODE>mode)
-		  || misaligned_operand (operands[1], <MODE>mode)))
+	  if (misaligned_operand (operands[0], <MODE>mode)
+	      || misaligned_operand (operands[1], <MODE>mode))
 	    return "%vmovups\t{%1, %0|%0, %1}";
 	  else
 	    return "%vmovaps\t{%1, %0|%0, %1}";
@@ -912,19 +902,17 @@
 	case MODE_V8DF:
 	case MODE_V4DF:
 	case MODE_V2DF:
-	  if ((TARGET_AVX || TARGET_IAMCU)
-	      && (misaligned_operand (operands[0], <MODE>mode)
-		  || misaligned_operand (operands[1], <MODE>mode)))
+	  if (misaligned_operand (operands[0], <MODE>mode)
+	      || misaligned_operand (operands[1], <MODE>mode))
 	    return "%vmovupd\t{%1, %0|%0, %1}";
 	  else
 	    return "%vmovapd\t{%1, %0|%0, %1}";
 
 	case MODE_OI:
 	case MODE_TI:
-	  if ((TARGET_AVX || TARGET_IAMCU)
-	      && (misaligned_operand (operands[0], <MODE>mode)
-		  || misaligned_operand (operands[1], <MODE>mode)))
-	    return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}"
+	  if (misaligned_operand (operands[0], <MODE>mode)
+	      || misaligned_operand (operands[1], <MODE>mode))
+	    return TARGET_AVX512VL ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
 				   : "%vmovdqu\t{%1, %0|%0, %1}";
 	  else
 	    return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
@@ -932,7 +920,11 @@
 	case MODE_XI:
 	  if (misaligned_operand (operands[0], <MODE>mode)
 	      || misaligned_operand (operands[1], <MODE>mode))
-	    return "vmovdqu64\t{%1, %0|%0, %1}";
+	    return (<MODE>mode == V16SImode
+		    || <MODE>mode == V8DImode
+		    || TARGET_AVX512BW)
+		   ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
+		   : "vmovdqu64\t{%1, %0|%0, %1}";
 	  else
 	    return "vmovdqa64\t{%1, %0|%0, %1}";
 
@@ -1154,62 +1146,6 @@
   DONE;
 })
 
-(define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
-  [(set (match_operand:VF 0 "register_operand")
-	(unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
-	  UNSPEC_LOADU))]
-  "TARGET_SSE && <mask_mode512bit_condition>"
-{
-  /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
-     just fine if misaligned_operand is true, and without the UNSPEC it can
-     be combined with arithmetic instructions.  If misaligned_operand is
-     false, still emit UNSPEC_LOADU insn to honor user's request for
-     misaligned load.  */
-  if (TARGET_AVX
-      && misaligned_operand (operands[1], <MODE>mode))
-    {
-      rtx src = operands[1];
-      if (<mask_applied>)
-	src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
-				 operands[2 * <mask_applied>],
-				 operands[3 * <mask_applied>]);
-      emit_insn (gen_rtx_SET (operands[0], src));
-      DONE;
-    }
-})
-
-(define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
-  [(set (match_operand:VF 0 "register_operand" "=v")
-	(unspec:VF
-	  [(match_operand:VF 1 "nonimmediate_operand" "vm")]
-	  UNSPEC_LOADU))]
-  "TARGET_SSE && <mask_mode512bit_condition>"
-{
-  switch (get_attr_mode (insn))
-    {
-    case MODE_V16SF:
-    case MODE_V8SF:
-    case MODE_V4SF:
-      return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
-    default:
-      return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
-    }
-}
-  [(set_attr "type" "ssemov")
-   (set_attr "movu" "1")
-   (set_attr "ssememalign" "8")
-   (set_attr "prefix" "maybe_vex")
-   (set (attr "mode")
-	(cond [(and (match_test "<MODE_SIZE> == 16")
-		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
-		 (const_string "<ssePSmode>")
-	       (match_test "TARGET_AVX")
-		 (const_string "<MODE>")
-	       (match_test "optimize_function_for_size_p (cfun)")
-		 (const_string "V4SF")
-	      ]
-	      (const_string "<MODE>")))])
-
 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
 (define_peephole2
   [(set (match_operand:V2DF 0 "register_operand")
@@ -1221,69 +1157,9 @@
 			 (match_operand:DF 3 "memory_operand")))]
   "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
    && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
-  [(set (match_dup 2)
-	(unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))]
+  [(set (match_dup 2) (match_dup 4))]
   "operands[4] = adjust_address (operands[1], V2DFmode, 0);")
 
-(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
-  [(set (match_operand:VF 0 "memory_operand" "=m")
-	(unspec:VF
-	  [(match_operand:VF 1 "register_operand" "v")]
-	  UNSPEC_STOREU))]
-  "TARGET_SSE"
-{
-  switch (get_attr_mode (insn))
-    {
-    case MODE_V16SF:
-    case MODE_V8SF:
-    case MODE_V4SF:
-      return "%vmovups\t{%1, %0|%0, %1}";
-    default:
-      return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
-    }
-}
-  [(set_attr "type" "ssemov")
-   (set_attr "movu" "1")
-   (set_attr "ssememalign" "8")
-   (set_attr "prefix" "maybe_vex")
-   (set (attr "mode")
-	(cond [(and (match_test "<MODE_SIZE> == 16")
-                    (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-                         (match_test "TARGET_SSE_TYPELESS_STORES")))
-		 (const_string "<ssePSmode>")
-	       (match_test "TARGET_AVX")
-		 (const_string "<MODE>")
-	       (match_test "optimize_function_for_size_p (cfun)")
-		 (const_string "V4SF")
-	      ]
-	      (const_string "<MODE>")))])
-
-(define_insn "<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask"
-  [(set (match_operand:VF_AVX512VL 0 "memory_operand" "=m")
-	(vec_merge:VF_AVX512VL
-	  (unspec:VF_AVX512VL
-	    [(match_operand:VF_AVX512VL 1 "register_operand" "v")]
-	    UNSPEC_STOREU)
-	  (match_dup 0)
-	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
-  "TARGET_AVX512F"
-{
-  switch (get_attr_mode (insn))
-    {
-    case MODE_V16SF:
-    case MODE_V8SF:
-    case MODE_V4SF:
-      return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
-    default:
-      return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
-    }
-}
-  [(set_attr "type" "ssemov")
-   (set_attr "movu" "1")
-   (set_attr "memory" "store")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
 (define_peephole2
   [(set (match_operand:DF 0 "memory_operand")
@@ -1294,238 +1170,9 @@
 		       (parallel [(const_int 1)])))]
   "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
    && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
-  [(set (match_dup 4)
-	(unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))]
+  [(set (match_dup 4) (match_dup 1))]
   "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
 
-/* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
-   just fine if misaligned_operand is true, and without the UNSPEC it can
-   be combined with arithmetic instructions.  If misaligned_operand is
-   false, still emit UNSPEC_LOADU insn to honor user's request for
-   misaligned load.  */
-(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
-  [(set (match_operand:VI1 0 "register_operand")
-	(unspec:VI1
-	  [(match_operand:VI1 1 "nonimmediate_operand")]
-	  UNSPEC_LOADU))]
-  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
-{
-  if (TARGET_AVX
-      && misaligned_operand (operands[1], <MODE>mode))
-    {
-      rtx src = operands[1];
-      if (<mask_applied>)
-	src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
-				 operands[2 * <mask_applied>],
-				 operands[3 * <mask_applied>]);
-      emit_insn (gen_rtx_SET (operands[0], src));
-      DONE;
-    }
-})
-
-(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
-  [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand")
-	(unspec:VI_ULOADSTORE_BW_AVX512VL
-	  [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand")]
-	  UNSPEC_LOADU))]
-  "TARGET_AVX512BW"
-{
-  if (misaligned_operand (operands[1], <MODE>mode))
-    {
-      rtx src = operands[1];
-      if (<mask_applied>)
-	src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
-				 operands[2 * <mask_applied>],
-				 operands[3 * <mask_applied>]);
-      emit_insn (gen_rtx_SET (operands[0], src));
-      DONE;
-    }
-})
-
-(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
-  [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand")
-	(unspec:VI_ULOADSTORE_F_AVX512VL
-	  [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand")]
-	  UNSPEC_LOADU))]
-  "TARGET_AVX512F"
-{
-  if (misaligned_operand (operands[1], <MODE>mode))
-    {
-      rtx src = operands[1];
-      if (<mask_applied>)
-	src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
-				 operands[2 * <mask_applied>],
-				 operands[3 * <mask_applied>]);
-      emit_insn (gen_rtx_SET (operands[0], src));
-      DONE;
-    }
-})
-
-(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
-  [(set (match_operand:VI1 0 "register_operand" "=v")
-	(unspec:VI1
-	  [(match_operand:VI1 1 "nonimmediate_operand" "vm")]
-	  UNSPEC_LOADU))]
-  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
-{
-  switch (get_attr_mode (insn))
-    {
-    case MODE_V8SF:
-    case MODE_V4SF:
-      return "%vmovups\t{%1, %0|%0, %1}";
-    default:
-      if (!(TARGET_AVX512VL && TARGET_AVX512BW))
-	return "%vmovdqu\t{%1, %0|%0, %1}";
-      else
-	return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
-    }
-}
-  [(set_attr "type" "ssemov")
-   (set_attr "movu" "1")
-   (set_attr "ssememalign" "8")
-   (set (attr "prefix_data16")
-     (if_then_else
-       (match_test "TARGET_AVX")
-     (const_string "*")
-     (const_string "1")))
-   (set_attr "prefix" "maybe_vex")
-   (set (attr "mode")
-	(cond [(and (match_test "<MODE_SIZE> == 16")
-		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
-		 (const_string "<ssePSmode>")
-	       (match_test "TARGET_AVX")
-		 (const_string "<sseinsnmode>")
-	       (match_test "optimize_function_for_size_p (cfun)")
-	         (const_string "V4SF")
-	      ]
-	      (const_string "<sseinsnmode>")))])
-
-(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
-  [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand" "=v")
-	(unspec:VI_ULOADSTORE_BW_AVX512VL
-	  [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand" "vm")]
-	  UNSPEC_LOADU))]
-  "TARGET_AVX512BW"
-  "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
-  [(set_attr "type" "ssemov")
-   (set_attr "movu" "1")
-   (set_attr "ssememalign" "8")
-   (set_attr "prefix" "maybe_evex")])
-
-(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
-  [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand" "=v")
-	(unspec:VI_ULOADSTORE_F_AVX512VL
-	  [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand" "vm")]
-	  UNSPEC_LOADU))]
-  "TARGET_AVX512F"
-  "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
-  [(set_attr "type" "ssemov")
-   (set_attr "movu" "1")
-   (set_attr "ssememalign" "8")
-   (set_attr "prefix" "maybe_evex")])
-
-(define_insn "<sse2_avx_avx512f>_storedqu<mode>"
-  [(set (match_operand:VI1 0 "memory_operand" "=m")
-	(unspec:VI1
-	  [(match_operand:VI1 1 "register_operand" "v")]
-	  UNSPEC_STOREU))]
-  "TARGET_SSE2"
-{
-  switch (get_attr_mode (insn))
-    {
-    case MODE_V16SF:
-    case MODE_V8SF:
-    case MODE_V4SF:
-      return "%vmovups\t{%1, %0|%0, %1}";
-    default:
-      switch (<MODE>mode)
-      {
-      case V32QImode:
-      case V16QImode:
-	if (!(TARGET_AVX512VL && TARGET_AVX512BW))
-	  return "%vmovdqu\t{%1, %0|%0, %1}";
-      default:
-	  return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}";
-      }
-    }
-}
-  [(set_attr "type" "ssemov")
-   (set_attr "movu" "1")
-   (set_attr "ssememalign" "8")
-   (set (attr "prefix_data16")
-     (if_then_else
-       (match_test "TARGET_AVX")
-     (const_string "*")
-     (const_string "1")))
-   (set_attr "prefix" "maybe_vex")
-   (set (attr "mode")
-	(cond [(and (match_test "<MODE_SIZE> == 16")
-		    (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-			 (match_test "TARGET_SSE_TYPELESS_STORES")))
-		 (const_string "<ssePSmode>")
-	       (match_test "TARGET_AVX")
-		 (const_string "<sseinsnmode>")
-	       (match_test "optimize_function_for_size_p (cfun)")
-	         (const_string "V4SF")
-	      ]
-	      (const_string "<sseinsnmode>")))])
-
-(define_insn "<sse2_avx_avx512f>_storedqu<mode>"
-  [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "memory_operand" "=m")
-	(unspec:VI_ULOADSTORE_BW_AVX512VL
-	  [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "register_operand" "v")]
-	  UNSPEC_STOREU))]
-  "TARGET_AVX512BW"
-  "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "movu" "1")
-   (set_attr "ssememalign" "8")
-   (set_attr "prefix" "maybe_evex")])
-
-(define_insn "<sse2_avx_avx512f>_storedqu<mode>"
-  [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "memory_operand" "=m")
-	(unspec:VI_ULOADSTORE_F_AVX512VL
-	  [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "register_operand" "v")]
-	  UNSPEC_STOREU))]
-  "TARGET_AVX512F"
-  "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "movu" "1")
-   (set_attr "ssememalign" "8")
-   (set_attr "prefix" "maybe_vex")])
-
-(define_insn "<avx512>_storedqu<mode>_mask"
-  [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m")
-	(vec_merge:VI48_AVX512VL
-	  (unspec:VI48_AVX512VL
-	    [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
-	    UNSPEC_STOREU)
-	  (match_dup 0)
-	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
-  "TARGET_AVX512F"
-  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "movu" "1")
-   (set_attr "memory" "store")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "<avx512>_storedqu<mode>_mask"
-  [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
-	(vec_merge:VI12_AVX512VL
-	  (unspec:VI12_AVX512VL
-	    [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
-	    UNSPEC_STOREU)
-	  (match_dup 0)
-	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
-  "TARGET_AVX512BW"
-  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "movu" "1")
-   (set_attr "memory" "store")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
 (define_insn "<sse3>_lddqu<avxsizesuffix>"
   [(set (match_operand:VI1 0 "register_operand" "=x")
 	(unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
@@ -15465,69 +15112,6 @@
    (set_attr "memory" "none,load")
    (set_attr "mode" "TI")])
 
-(define_insn_and_split "*sse4_2_pcmpestr_unaligned"
-  [(set (match_operand:SI 0 "register_operand" "=c")
-	(unspec:SI
-	  [(match_operand:V16QI 2 "register_operand" "x")
-	   (match_operand:SI 3 "register_operand" "a")
-	   (unspec:V16QI
-	     [(match_operand:V16QI 4 "memory_operand" "m")]
-	     UNSPEC_LOADU)
-	   (match_operand:SI 5 "register_operand" "d")
-	   (match_operand:SI 6 "const_0_to_255_operand" "n")]
-	  UNSPEC_PCMPESTR))
-   (set (match_operand:V16QI 1 "register_operand" "=Yz")
-	(unspec:V16QI
-	  [(match_dup 2)
-	   (match_dup 3)
-	   (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
-	   (match_dup 5)
-	   (match_dup 6)]
-	  UNSPEC_PCMPESTR))
-   (set (reg:CC FLAGS_REG)
-	(unspec:CC
-	  [(match_dup 2)
-	   (match_dup 3)
-	   (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
-	   (match_dup 5)
-	   (match_dup 6)]
-	  UNSPEC_PCMPESTR))]
-  "TARGET_SSE4_2
-   && can_create_pseudo_p ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
-  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
-  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
-
-  if (ecx)
-    emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
-				     operands[3], operands[4],
-				     operands[5], operands[6]));
-  if (xmm0)
-    emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
-				     operands[3], operands[4],
-				     operands[5], operands[6]));
-  if (flags && !(ecx || xmm0))
-    emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
-					   operands[2], operands[3],
-					   operands[4], operands[5],
-					   operands[6]));
-  if (!(flags || ecx || xmm0))
-    emit_note (NOTE_INSN_DELETED);
-
-  DONE;
-}
-  [(set_attr "type" "sselog")
-   (set_attr "prefix_data16" "1")
-   (set_attr "prefix_extra" "1")
-   (set_attr "ssememalign" "8")
-   (set_attr "length_immediate" "1")
-   (set_attr "memory" "load")
-   (set_attr "mode" "TI")])
-
 (define_insn "sse4_2_pcmpestri"
   [(set (match_operand:SI 0 "register_operand" "=c,c")
 	(unspec:SI
@@ -15665,60 +15249,6 @@
    (set_attr "memory" "none,load")
    (set_attr "mode" "TI")])
 
-(define_insn_and_split "*sse4_2_pcmpistr_unaligned"
-  [(set (match_operand:SI 0 "register_operand" "=c")
-	(unspec:SI
-	  [(match_operand:V16QI 2 "register_operand" "x")
-	   (unspec:V16QI
-	     [(match_operand:V16QI 3 "memory_operand" "m")]
-	     UNSPEC_LOADU)
-	   (match_operand:SI 4 "const_0_to_255_operand" "n")]
-	  UNSPEC_PCMPISTR))
-   (set (match_operand:V16QI 1 "register_operand" "=Yz")
-	(unspec:V16QI
-	  [(match_dup 2)
-	   (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
-	   (match_dup 4)]
-	  UNSPEC_PCMPISTR))
-   (set (reg:CC FLAGS_REG)
-	(unspec:CC
-	  [(match_dup 2)
-	   (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
-	   (match_dup 4)]
-	  UNSPEC_PCMPISTR))]
-  "TARGET_SSE4_2
-   && can_create_pseudo_p ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
-  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
-  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
-
-  if (ecx)
-    emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
-				     operands[3], operands[4]));
-  if (xmm0)
-    emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
-				     operands[3], operands[4]));
-  if (flags && !(ecx || xmm0))
-    emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
-					   operands[2], operands[3],
-					   operands[4]));
-  if (!(flags || ecx || xmm0))
-    emit_note (NOTE_INSN_DELETED);
-
-  DONE;
-}
-  [(set_attr "type" "sselog")
-   (set_attr "prefix_data16" "1")
-   (set_attr "prefix_extra" "1")
-   (set_attr "ssememalign" "8")
-   (set_attr "length_immediate" "1")
-   (set_attr "memory" "load")
-   (set_attr "mode" "TI")])
-
 (define_insn "sse4_2_pcmpistri"
   [(set (match_operand:SI 0 "register_operand" "=c,c")
 	(unspec:SI
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c
index 5e8c30d..d82aecf 100644
--- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c
@@ -3,7 +3,7 @@
 
 #define N 1024
 
-float a[N], b[N+3], c[N], d[N];
+extern float a[N], b[N+3], c[N], d[N];
 
 void
 avx_test (void)
@@ -17,6 +17,6 @@ avx_test (void)
     d[i] = c[i] * 20.0;
 }
 
-/* { dg-final { scan-assembler-not "avx_storeups256" } } */
-/* { dg-final { scan-assembler "vmovups.*\\*movv4sf_internal/3" } } */
+/* { dg-final { scan-assembler-not "vmovups.*movv8sf_internal/3" } } */
+/* { dg-final { scan-assembler "vmovups.*movv4sf_internal/3" } } */
 /* { dg-final { scan-assembler "vextractf128" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c
index eeabfe9..817be17 100644
--- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c
@@ -23,6 +23,6 @@ avx_test (void)
     }
 }
 
-/* { dg-final { scan-assembler-not "avx_storedqu256" } } */
-/* { dg-final { scan-assembler "vmovups.*\\*movv16qi_internal/3" } } */
+/* { dg-final { scan-assembler-not "vmovups.*movv32qi_internal/3" } } */
+/* { dg-final { scan-assembler "vmovups.*movv16qi_internal/3" } } */
 /* { dg-final { scan-assembler "vextract.128" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c
index 6175d52..a439a66 100644
--- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c
@@ -3,7 +3,7 @@
 
 #define N 1024
 
-double a[N], b[N+3], c[N], d[N];
+extern double a[N], b[N+3], c[N], d[N];
 
 void
 avx_test (void)
@@ -17,6 +17,6 @@ avx_test (void)
     d[i] = c[i] * 20.0;
 }
 
-/* { dg-final { scan-assembler-not "avx_storeupd256" } } */
-/* { dg-final { scan-assembler "vmovups.*\\*movv2df_internal/3" } } */
+/* { dg-final { scan-assembler-not "vmovups.*movv4df_internal/3" } } */
+/* { dg-final { scan-assembler "vmovups.*movv2df_internal/3" } } */
 /* { dg-final { scan-assembler "vextractf128" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c
index 68ff923..463c1d8 100644
--- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c
@@ -3,7 +3,7 @@
 
 #define N 1024
 
-float a[N], b[N+3], c[N];
+extern float a[N], b[N+3], c[N];
 
 void
 avx_test (void)
@@ -14,7 +14,6 @@ avx_test (void)
     b[i+3] = a[i] * c[i];
 }
 
-/* { dg-final { scan-assembler "avx_storeups256" } } */
-/* { dg-final { scan-assembler-not "sse_storeups" } } */
-/* { dg-final { scan-assembler-not "\\*avx_movv4sf_internal/3" } } */
+/* { dg-final { scan-assembler "vmovups.*movv8sf_internal/3" } } */
+/* { dg-final { scan-assembler-not "movups.*movv4sf_internal/3" } } */
 /* { dg-final { scan-assembler-not "vextractf128" } } */
-- 
2.5.5


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]