[AVX] Document changes

H.J. Lu hjl.tools@gmail.com
Wed Aug 20 14:54:00 GMT 2008


On Wed, Aug 20, 2008 at 7:10 AM, Ye, Joey <joey.ye@intel.com> wrote:
> This is the document change for AVX new functions. OK for branch?
>
> Thanks - Joey
> ---
> ChangeLog:
> 2008-08-20      Joey Ye <joey.ye@intel.com>
>
>        * doc/extend.texi: Document AVX built-in function.
>
>        * doc/invoke.texi: Document -mavx.
>
> Index: extend.texi
> ===================================================================
> --- extend.texi (revision 3736)
> +++ extend.texi (working copy)
> @@ -8271,6 +8271,141 @@
>  Generates the @code{popcntq} machine instruction.
>  @end table
>
> +The following built-in functions are available when @option{-mavx} is
> +used. All of them generate the machine instruction that is part of the
> +name.
> +
> +@smallexample
> +void __builtin_ia32_vzeroupper (void)
> +void __builtin_ia32_vzeroall (void)
> +v4sf __builtin_ia32_vbroadcastss (pcfloat)
> +v4df __builtin_ia32_vbroadcastsd256 (pcdouble)
> +v8sf __builtin_ia32_vbroadcastss256 (pcfloat)
> +v4df __builtin_ia32_vbroadcastf128_pd256 (pcv2df)
> +v8sf __builtin_ia32_vbroadcastf128_ps256 (pcv4sf)
> +v4df __builtin_ia32_loadupd256 (pcdouble)
> +v8sf __builtin_ia32_loadups256 (pcfloat)
> +void __builtin_ia32_storeupd256 (pdouble,v4df)
> +void __builtin_ia32_storeups256 (pfloat,v8sf)
> +v32qi __builtin_ia32_loaddqu256 (pcchar)
> +void __builtin_ia32_storedqu256 (pchar,v32qi)
> +v32qi __builtin_ia32_lddqu256 (pcchar)
> +v2df __builtin_ia32_maskloadpd (pcv2df,v2df)
> +v4sf __builtin_ia32_maskloadps (pcv4sf,v4sf)
> +v4df __builtin_ia32_maskloadpd256 (pcv4df,v4df)
> +v8sf __builtin_ia32_maskloadps256 (pcv8sf,v8sf)
> +void __builtin_ia32_maskstorepd (pv2df,v2df,v2df)
> +void __builtin_ia32_maskstoreps (pv4sf,v4sf,v4sf)
> +void __builtin_ia32_maskstorepd256 (pv4df,v4df,v4df)
> +void __builtin_ia32_maskstoreps256 (pv8sf,v8sf,v8sf)
> +v4df __builtin_ia32_addpd256 (v4df,v4df)
> +v8sf __builtin_ia32_addps256 (v8sf,v8sf)
> +v4df __builtin_ia32_addsubpd256 (v4df,v4df)
> +v8sf __builtin_ia32_addsubps256 (v8sf,v8sf)
> +v4df __builtin_ia32_andpd256 (v4df,v4df)
> +v8sf __builtin_ia32_andps256 (v8sf,v8sf)
> +v4df __builtin_ia32_andnpd256 (v4df,v4df)
> +v8sf __builtin_ia32_andnps256 (v8sf,v8sf)
> +v4df __builtin_ia32_divpd256 (v4df,v4df)
> +v8sf __builtin_ia32_divps256 (v8sf,v8sf)
> +v4df __builtin_ia32_haddpd256 (v4df,v4df)
> +v8sf __builtin_ia32_hsubps256 (v8sf,v8sf)
> +v4df __builtin_ia32_hsubpd256 (v4df,v4df)
> +v8sf __builtin_ia32_haddps256 (v8sf,v8sf)
> +v4df __builtin_ia32_maxpd256 (v4df,v4df)
> +v8sf __builtin_ia32_maxps256 (v8sf,v8sf)
> +v4df __builtin_ia32_minpd256 (v4df,v4df)
> +v8sf __builtin_ia32_minps256 (v8sf,v8sf)
> +v4df __builtin_ia32_mulpd256 (v4df,v4df)
> +v8sf __builtin_ia32_mulps256 (v8sf,v8sf)
> +v4df __builtin_ia32_orpd256 (v4df,v4df)
> +v8sf __builtin_ia32_orps256 (v8sf,v8sf)
> +v4df __builtin_ia32_subpd256 (v4df,v4df)
> +v8sf __builtin_ia32_subps256 (v8sf,v8sf)
> +v4df __builtin_ia32_xorpd256 (v4df,v4df)
> +v8sf __builtin_ia32_xorps256 (v8sf,v8sf)
> +v2df __builtin_ia32_vpermilvarpd (v2df,v2di)
> +v4sf __builtin_ia32_vpermilvarps (v4sf,v4si)
> +v4df __builtin_ia32_vpermilvarpd256 (v4df,v4di)
> +v8sf __builtin_ia32_vpermilvarps256 (v8sf,v8si)
> +v4df __builtin_ia32_blendpd256 (v4df,v4df,int)
> +v8sf __builtin_ia32_blendps256 (v8sf,v8sf,int)
> +v4df __builtin_ia32_blendvpd256 (v4df,v4df,v4df)
> +v8sf __builtin_ia32_blendvps256 (v8sf,v8sf,v8sf)
> +v8sf __builtin_ia32_dpps256 (v8sf,v8sf,int)
> +v4df __builtin_ia32_shufpd256 (v4df,v4df,int)
> +v8sf __builtin_ia32_shufps256 (v8sf,v8sf,int)
> +v2df __builtin_ia32_cmpsd (v2df,v2df,int)
> +v4sf __builtin_ia32_cmpss (v4sf,v4sf,int)
> +v2df __builtin_ia32_cmppd (v2df,v2df,int)
> +v4sf __builtin_ia32_cmpps (v4sf,v4sf,int)
> +v4df __builtin_ia32_cmppd256 (v4df,v4df,int)
> +v8sf __builtin_ia32_cmpps256 (v8sf,v8sf,int)
> +v2df __builtin_ia32_vextractf128_pd256 (v4df,int)
> +v4sf __builtin_ia32_vextractf128_ps256 (v8sf,int)
> +v4si __builtin_ia32_vextractf128_si256 (v8si,int)
> +v4df __builtin_ia32_cvtdq2pd256 (v4si)
> +v8sf __builtin_ia32_cvtdq2ps256 (v8si)
> +v4sf __builtin_ia32_cvtpd2ps256 (v4df)
> +v8si __builtin_ia32_cvtps2dq256 (v8sf)
> +v4df __builtin_ia32_cvtps2pd256 (v4sf)
> +v4si __builtin_ia32_cvttpd2dq256 (v4df)
> +v4si __builtin_ia32_cvtpd2dq256 (v4df)
> +v8si __builtin_ia32_cvttps2dq256 (v8sf)
> +v4df __builtin_ia32_vperm2f128_pd256 (v4df,v4df,int)
> +v8sf __builtin_ia32_vperm2f128_ps256 (v8sf,v8sf,int)
> +v8si __builtin_ia32_vperm2f128_si256 (v8si,v8si,int)
> +v2df __builtin_ia32_vpermilpd (v2df,int)
> +v4sf __builtin_ia32_vpermilps (v4sf,int)
> +v4df __builtin_ia32_vpermilpd256 (v4df,int)
> +v8sf __builtin_ia32_vpermilps256 (v8sf,int)
> +v2df __builtin_ia32_vpermil2pd (v2df,v2df,v2di,int)
> +v4sf __builtin_ia32_vpermil2ps (v4sf,v4sf,v4si,int)
> +v4df __builtin_ia32_vpermil2pd256 (v4df,v4df,v4di,int)
> +v8sf __builtin_ia32_vpermil2ps256 (v8sf,v8sf,v8si,int)
> +v4df __builtin_ia32_vinsertf128_pd256 (v4df,v2df,int)
> +v8sf __builtin_ia32_vinsertf128_ps256 (v8sf,v4sf,int)
> +v8si __builtin_ia32_vinsertf128_si256 (v8si,v4si,int)
> +v8sf __builtin_ia32_movshdup256 (v8sf)
> +v8sf __builtin_ia32_movsldup256 (v8sf)
> +v4df __builtin_ia32_movddup256 (v4df)
> +v4df __builtin_ia32_sqrtpd256 (v4df)
> +v8sf __builtin_ia32_sqrtps256 (v8sf)
> +v8sf __builtin_ia32_sqrtps_nr256 (v8sf)
> +v8sf __builtin_ia32_rsqrtps256 (v8sf)
> +v8sf __builtin_ia32_rsqrtps_nr256 (v8sf)
> +v8sf __builtin_ia32_rcpps256 (v8sf)
> +v4df __builtin_ia32_roundpd256 (v4df,int)
> +v8sf __builtin_ia32_roundps256 (v8sf,int)
> +v4df __builtin_ia32_unpckhpd256 (v4df,v4df)
> +v4df __builtin_ia32_unpcklpd256 (v4df,v4df)
> +v8sf __builtin_ia32_unpckhps256 (v8sf,v8sf)
> +v8sf __builtin_ia32_unpcklps256 (v8sf,v8sf)
> +v8si __builtin_ia32_si256_si (v4si)
> +v8sf __builtin_ia32_ps256_ps (v4sf)
> +v4df __builtin_ia32_pd256_pd (v2df)
> +v4si __builtin_ia32_si_si256 (v8si)
> +v4sf __builtin_ia32_ps_ps256 (v8sf)
> +v2df __builtin_ia32_pd_pd256 (v4df)
> +int __builtin_ia32_vtestzpd (v2df,v2df,ptest)
> +int __builtin_ia32_vtestcpd (v2df,v2df,ptest)
> +int __builtin_ia32_vtestnzcpd (v2df,v2df,ptest)
> +int __builtin_ia32_vtestzps (v4sf,v4sf,ptest)
> +int __builtin_ia32_vtestcps (v4sf,v4sf,ptest)
> +int __builtin_ia32_vtestnzcps (v4sf,v4sf,ptest)
> +int __builtin_ia32_vtestzpd256 (v4df,v4df,ptest)
> +int __builtin_ia32_vtestcpd256 (v4df,v4df,ptest)
> +int __builtin_ia32_vtestnzcpd256 (v4df,v4df,ptest)
> +int __builtin_ia32_vtestzps256 (v8sf,v8sf,ptest)
> +int __builtin_ia32_vtestcps256 (v8sf,v8sf,ptest)
> +int __builtin_ia32_vtestnzcps256 (v8sf,v8sf,ptest)
> +int __builtin_ia32_ptestz256 (v4di,v4di,ptest)
> +int __builtin_ia32_ptestc256 (v4di,v4di,ptest)
> +int __builtin_ia32_ptestnzc256 (v4di,v4di,ptest)
> +int __builtin_ia32_movmskpd256 (v4df)
> +int __builtin_ia32_movmskps256 (v8sf)
> +@end smallexample

Please sort them by names.

>  The following built-in functions are available when @option{-maes} is
>  used.  All of them generate the machine instruction that is part of the
>  name.
> Index: invoke.texi
> ===================================================================
> --- invoke.texi (revision 3736)
> +++ invoke.texi (working copy)
> @@ -558,7 +558,7 @@
>  -mpreferred-stack-boundary=@var{num}
>  -mincoming-stack-boundary=@var{num}
>  -mcld -mcx16 -msahf -mrecip @gol
> --mmmx  -msse  -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 @gol
> +-mmmx  -msse  -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
>  -maes -mpclmul @gol
>  -msse4a -m3dnow -mpopcnt -mabm -msse5 @gol
>  -mthreads  -mno-align-stringops  -minline-all-stringops @gol
> @@ -10790,6 +10790,8 @@
>  @itemx -mno-sse4.2
>  @itemx -msse4
>  @itemx -mno-sse4
> +@itemx -mavx
> +@itemx -mno-avx
>  @itemx -maes
>  @itemx -mno-aes
>  @itemx -mpclmul
> @@ -10811,7 +10813,7 @@
>  @opindex m3dnow
>  @opindex mno-3dnow
>  These switches enable or disable the use of instructions in the MMX,
> -SSE, SSE2, SSE3, SSSE3, SSE4.1, AES, PCLMUL, SSE4A, SSE5, ABM or
> +SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AES, PCLMUL, SSE4A, SSE5, ABM or
>  3DNow!@: extended instruction sets.
>  These extensions are also available as built-in functions: see
>  @ref{X86 Built-in Functions}, for details of the functions enabled and
>

Please mention that -mavx will generate AVX instructions, instead of SSE.

Thanks.


-- 
H.J.



More information about the Gcc-patches mailing list