This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
i386 SSE math switch take II
- From: Jan Hubicka <jh at suse dot cz>
- To: gcc-patches at gcc dot gnu dot org, rth at cygnys dot com, patches at x86-64 dot org
- Date: Thu, 13 Dec 2001 10:51:32 +0100
- Subject: i386 SSE math switch take II
Hi,
this patch implements the -mfpmath (I found -mmath bit confusing) as discussed earlier.
The valid choices are "sse" "387" or "sse,387".
Honza
Thu Dec 13 10:47:25 CET 2001 Jan Hubicka <jh@suse.cz>
* i386.c (ix86_fpmath, ix86_fpmath_string): New.
(override_option): Set ix86_fpmath.
* i386.h (MASK_MIX_SSE_I387): Remove.
(TARGET_SSE_MATH): New.
(TARGET_MIX_SSE_I387): Use ix86_fpmath.
(TARGET_SWITCHES): Remove "mix-sse-i387".
(fpmath_unit): New enum.
(ix86_fpmath, ix86_fpmath_string): Declare.
* i386.md (swapsf): Fix condition.
(add?f, sub?f, mul?f, div?f, sqrt?f, min?f): Use TARGET_SSE_MATH.
(fp_?f_*_nosse): New.
(fp_*): Use TARGET_SSE_MATH.
* invoke.texi (-mfpmath): Document.
(-msse2): Add.
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.344
diff -c -3 -p -r1.344 i386.c
*** i386.c 2001/12/12 18:08:37 1.344
--- i386.c 2001/12/13 09:45:25
*************** enum cmodel ix86_cmodel;
*** 578,589 ****
--- 578,593 ----
/* which cpu are we scheduling for */
enum processor_type ix86_cpu;
+ /* which unit we are generating floating point math for */
+ enum fpmath_unit ix86_fpmath;
+
/* which instruction set architecture to use. */
int ix86_arch;
/* Strings to hold which cpu and instruction set architecture to use. */
const char *ix86_cpu_string; /* for -mcpu=<xxx> */
const char *ix86_arch_string; /* for -march=<xxx> */
+ const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
/* # of registers to use to pass arguments. */
const char *ix86_regparm_string;
*************** override_options ()
*** 1027,1034 ****
if (TARGET_RTD)
error ("-mrtd calling convention not supported in the 64bit mode");
/* Enable by default the SSE and MMX builtins. */
! target_flags |= MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE;
}
/* It makes no sense to ask for just SSE builtins, so MMX is also turned
on by -msse. */
--- 1031,1075 ----
if (TARGET_RTD)
error ("-mrtd calling convention not supported in the 64bit mode");
/* Enable by default the SSE and MMX builtins. */
! target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
! ix86_fpmath = FPMATH_SSE;
}
+ else
+ ix86_fpmath = FPMATH_387;
+
+ if (ix86_fpmath_string != 0)
+ {
+ if (! strcmp (ix86_fpmath_string, "387"))
+ ix86_fpmath = FPMATH_387;
+ else if (! strcmp (ix86_fpmath_string, "sse"))
+ {
+ if (!TARGET_SSE)
+ {
+ warning ("SSE instruction set disabled, using 387 arithmetics");
+ ix86_fpmath = FPMATH_387;
+ }
+ else
+ ix86_fpmath = FPMATH_SSE;
+ }
+ else if (! strcmp (ix86_fpmath_string, "387,sse")
+ || ! strcmp (ix86_fpmath_string, "sse,387"))
+ {
+ if (!TARGET_SSE)
+ {
+ warning ("SSE instruction set disabled, using 387 arithmetics");
+ ix86_fpmath = FPMATH_387;
+ }
+ else if (!TARGET_80387)
+ {
+ warning ("387 instruction set disabled, using SSE arithmetics");
+ ix86_fpmath = FPMATH_SSE;
+ }
+ else
+ ix86_fpmath = FPMATH_SSE | FPMATH_387;
+ }
+ else
+ error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
+ }
/* It makes no sense to ask for just SSE builtins, so MMX is also turned
on by -msse. */
*************** ix86_expand_fp_movcc (operands)
*** 8078,8085 ****
/* For SF/DFmode conditional moves based on comparisons
in same mode, we may want to use SSE min/max instructions. */
! if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
! || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
&& GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
/* The SSE comparisons does not support the LTGT/UNEQ pair. */
&& (!TARGET_IEEE_FP
--- 8119,8126 ----
/* For SF/DFmode conditional moves based on comparisons
in same mode, we may want to use SSE min/max instructions. */
! if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
! || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
&& GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
/* The SSE comparisons does not support the LTGT/UNEQ pair. */
&& (!TARGET_IEEE_FP
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.221
diff -c -3 -p -r1.221 i386.h
*** i386.h 2001/12/09 20:13:11 1.221
--- i386.h 2001/12/13 09:45:25
*************** extern int target_flags;
*** 123,131 ****
#define MASK_3DNOW 0x00100000 /* Support 3Dnow builtins */
#define MASK_3DNOW_A 0x00200000 /* Support Athlon 3Dnow builtins */
#define MASK_128BIT_LONG_DOUBLE 0x00400000 /* long double size is 128bit */
! #define MASK_MIX_SSE_I387 0x00800000 /* Mix SSE and i387 instructions */
! #define MASK_64BIT 0x01000000 /* Produce 64bit code */
! #define MASK_NO_RED_ZONE 0x02000000 /* Do not use red zone */
/* Temporary codegen switches */
#define MASK_INTEL_SYNTAX 0x00000200
--- 123,130 ----
#define MASK_3DNOW 0x00100000 /* Support 3Dnow builtins */
#define MASK_3DNOW_A 0x00200000 /* Support Athlon 3Dnow builtins */
#define MASK_128BIT_LONG_DOUBLE 0x00400000 /* long double size is 128bit */
! #define MASK_64BIT 0x00800000 /* Produce 64bit code */
! #define MASK_NO_RED_ZONE 0x01000000 /* Do not use red zone */
/* Temporary codegen switches */
#define MASK_INTEL_SYNTAX 0x00000200
*************** extern const int x86_epilogue_using_move
*** 267,273 ****
#define TARGET_SSE ((target_flags & (MASK_SSE | MASK_SSE2)) != 0)
#define TARGET_SSE2 ((target_flags & MASK_SSE2) != 0)
! #define TARGET_MIX_SSE_I387 ((target_flags & MASK_MIX_SSE_I387) != 0)
#define TARGET_MMX ((target_flags & MASK_MMX) != 0)
#define TARGET_3DNOW ((target_flags & MASK_3DNOW) != 0)
#define TARGET_3DNOW_A ((target_flags & MASK_3DNOW_A) != 0)
--- 266,274 ----
#define TARGET_SSE ((target_flags & (MASK_SSE | MASK_SSE2)) != 0)
#define TARGET_SSE2 ((target_flags & MASK_SSE2) != 0)
! #define TARGET_SSE_MATH ((ix86_fpmath & FPMATH_SSE) != 0)
! #define TARGET_MIX_SSE_I387 ((ix86_fpmath & FPMATH_SSE) \
! && (ix86_fpmath & FPMATH_387))
#define TARGET_MMX ((target_flags & MASK_MMX) != 0)
#define TARGET_3DNOW ((target_flags & MASK_3DNOW) != 0)
#define TARGET_3DNOW_A ((target_flags & MASK_3DNOW_A) != 0)
*************** extern const int x86_epilogue_using_move
*** 354,363 ****
N_("Support MMX, SSE and SSE2 builtins and code generation") }, \
{ "no-sse2", -MASK_SSE2, \
N_("Do not support MMX, SSE and SSE2 builtins and code generation") }, \
- { "mix-sse-i387", MASK_MIX_SSE_I387, \
- N_("Use both SSE and i387 instruction sets for floating point arithmetics") },\
- { "no-mix-sse-i387", -MASK_MIX_SSE_I387, \
- N_("Do not use both SSE and i387 instruction sets for floating point arithmetics") },\
{ "128bit-long-double", MASK_128BIT_LONG_DOUBLE, \
N_("sizeof(long double) is 16") }, \
{ "96bit-long-double", -MASK_128BIT_LONG_DOUBLE, \
--- 355,360 ----
*************** enum processor_type
*** 393,400 ****
--- 390,403 ----
PROCESSOR_PENTIUM4,
PROCESSOR_max
};
+ enum fpmath_unit
+ {
+ FPMATH_387 = 1,
+ FPMATH_SSE = 2
+ };
extern enum processor_type ix86_cpu;
+ extern enum fpmath_unit ix86_fpmath;
extern int ix86_arch;
*************** extern int ix86_arch;
*** 410,415 ****
--- 413,420 ----
#define TARGET_OPTIONS \
{ { "cpu=", &ix86_cpu_string, \
N_("Schedule code for given CPU")}, \
+ { "fpmath=", &ix86_fpmath_string, \
+ N_("Generate floating point mathematics using given instruction set")},\
{ "arch=", &ix86_arch_string, \
N_("Generate code for given CPU")}, \
{ "regparm=", &ix86_regparm_string, \
*************** enum reg_class
*** 1249,1255 ****
#define SSE_REG_P(n) (REG_P (n) && SSE_REGNO_P (REGNO (n)))
#define SSE_FLOAT_MODE_P(m) \
! ((TARGET_SSE && (m) == SFmode) || (TARGET_SSE2 && (m) == DFmode))
#define MMX_REGNO_P(n) ((n) >= FIRST_MMX_REG && (n) <= LAST_MMX_REG)
#define MMX_REG_P(xop) (REG_P (xop) && MMX_REGNO_P (REGNO (xop)))
--- 1254,1260 ----
#define SSE_REG_P(n) (REG_P (n) && SSE_REGNO_P (REGNO (n)))
#define SSE_FLOAT_MODE_P(m) \
! ((TARGET_SSE_MATH && (m) == SFmode) || (TARGET_SSE2 && (m) == DFmode))
#define MMX_REGNO_P(n) ((n) >= FIRST_MMX_REG && (n) <= LAST_MMX_REG)
#define MMX_REG_P(xop) (REG_P (xop) && MMX_REGNO_P (REGNO (xop)))
*************** extern enum cmodel ix86_cmodel;
*** 3089,3094 ****
--- 3094,3100 ----
/* Variables in i386.c */
extern const char *ix86_cpu_string; /* for -mcpu=<xxx> */
+ extern const char *ix86_fpmath_string; /* for -mcpu=<xxx> */
extern const char *ix86_arch_string; /* for -march=<xxx> */
extern const char *ix86_regparm_string; /* # registers to use to pass args */
extern const char *ix86_align_loops_string; /* power of two alignment for loops */
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.321
diff -c -3 -p -r1.321 i386.md
*** i386.md 2001/12/10 14:30:11 1.321
--- i386.md 2001/12/13 09:45:30
***************
*** 2776,2782 ****
(match_operand:SF 1 "register_operand" "+f"))
(set (match_dup 1)
(match_dup 0))]
! "reload_completed || !TARGET_SSE2"
{
if (STACK_TOP_P (operands[0]))
return "fxch\t%1";
--- 2776,2782 ----
(match_operand:SF 1 "register_operand" "+f"))
(set (match_dup 1)
(match_dup 0))]
! "reload_completed || !TARGET_SSE"
{
if (STACK_TOP_P (operands[0]))
return "fxch\t%1";
***************
*** 6890,6903 ****
[(set (match_operand:DF 0 "register_operand" "")
(plus:DF (match_operand:DF 1 "register_operand" "")
(match_operand:DF 2 "nonimmediate_operand" "")))]
! "TARGET_80387 || TARGET_SSE2"
"")
(define_expand "addsf3"
[(set (match_operand:SF 0 "register_operand" "")
(plus:SF (match_operand:SF 1 "register_operand" "")
(match_operand:SF 2 "nonimmediate_operand" "")))]
! "TARGET_80387 || TARGET_SSE"
"")
;; Subtract instructions
--- 6890,6903 ----
[(set (match_operand:DF 0 "register_operand" "")
(plus:DF (match_operand:DF 1 "register_operand" "")
(match_operand:DF 2 "nonimmediate_operand" "")))]
! "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
"")
(define_expand "addsf3"
[(set (match_operand:SF 0 "register_operand" "")
(plus:SF (match_operand:SF 1 "register_operand" "")
(match_operand:SF 2 "nonimmediate_operand" "")))]
! "TARGET_80387 || TARGET_SSE_MATH"
"")
;; Subtract instructions
***************
*** 7207,7220 ****
[(set (match_operand:DF 0 "register_operand" "")
(minus:DF (match_operand:DF 1 "register_operand" "")
(match_operand:DF 2 "nonimmediate_operand" "")))]
! "TARGET_80387 || TARGET_SSE2"
"")
(define_expand "subsf3"
[(set (match_operand:SF 0 "register_operand" "")
(minus:SF (match_operand:SF 1 "register_operand" "")
(match_operand:SF 2 "nonimmediate_operand" "")))]
! "TARGET_80387 || TARGET_SSE"
"")
;; Multiply instructions
--- 7207,7220 ----
[(set (match_operand:DF 0 "register_operand" "")
(minus:DF (match_operand:DF 1 "register_operand" "")
(match_operand:DF 2 "nonimmediate_operand" "")))]
! "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
"")
(define_expand "subsf3"
[(set (match_operand:SF 0 "register_operand" "")
(minus:SF (match_operand:SF 1 "register_operand" "")
(match_operand:SF 2 "nonimmediate_operand" "")))]
! "TARGET_80387 || TARGET_SSE_MATH"
"")
;; Multiply instructions
***************
*** 7533,7546 ****
[(set (match_operand:DF 0 "register_operand" "")
(mult:DF (match_operand:DF 1 "register_operand" "")
(match_operand:DF 2 "nonimmediate_operand" "")))]
! "TARGET_80387 || TARGET_SSE2"
"")
(define_expand "mulsf3"
[(set (match_operand:SF 0 "register_operand" "")
(mult:SF (match_operand:SF 1 "register_operand" "")
(match_operand:SF 2 "nonimmediate_operand" "")))]
! "TARGET_80387 || TARGET_SSE"
"")
;; Divide instructions
--- 7533,7546 ----
[(set (match_operand:DF 0 "register_operand" "")
(mult:DF (match_operand:DF 1 "register_operand" "")
(match_operand:DF 2 "nonimmediate_operand" "")))]
! "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
"")
(define_expand "mulsf3"
[(set (match_operand:SF 0 "register_operand" "")
(mult:SF (match_operand:SF 1 "register_operand" "")
(match_operand:SF 2 "nonimmediate_operand" "")))]
! "TARGET_80387 || TARGET_SSE_MATH"
"")
;; Divide instructions
***************
*** 7587,7600 ****
[(set (match_operand:DF 0 "register_operand" "")
(div:DF (match_operand:DF 1 "register_operand" "")
(match_operand:DF 2 "nonimmediate_operand" "")))]
! "TARGET_80387 || TARGET_SSE2"
"")
(define_expand "divsf3"
[(set (match_operand:SF 0 "register_operand" "")
(div:SF (match_operand:SF 1 "register_operand" "")
(match_operand:SF 2 "nonimmediate_operand" "")))]
! "TARGET_80387 || TARGET_SSE"
"")
;; Remainder instructions.
--- 7587,7600 ----
[(set (match_operand:DF 0 "register_operand" "")
(div:DF (match_operand:DF 1 "register_operand" "")
(match_operand:DF 2 "nonimmediate_operand" "")))]
! "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
"")
(define_expand "divsf3"
[(set (match_operand:SF 0 "register_operand" "")
(div:SF (match_operand:SF 1 "register_operand" "")
(match_operand:SF 2 "nonimmediate_operand" "")))]
! "TARGET_80387 || TARGET_SSE_MATH"
"")
;; Remainder instructions.
***************
*** 13849,13860 ****
;; Gcc is slightly more smart about handling normal two address instructions
;; so use special patterns for add and mull.
(define_insn "*fop_sf_comm"
[(set (match_operand:SF 0 "register_operand" "=f#x,x#f")
(match_operator:SF 3 "binary_fp_operator"
[(match_operand:SF 1 "register_operand" "%0,0")
(match_operand:SF 2 "nonimmediate_operand" "fm#x,xm#f")]))]
! "TARGET_80387 && (!TARGET_SSE || TARGET_MIX_SSE_I387)
&& GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'"
"* return output_387_binary_op (insn, operands);"
[(set (attr "type")
--- 13849,13874 ----
;; Gcc is slightly more smart about handling normal two address instructions
;; so use special patterns for add and mull.
+ (define_insn "*fop_sf_comm_nosse"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (match_operator:SF 3 "binary_fp_operator"
+ [(match_operand:SF 1 "register_operand" "%0")
+ (match_operand:SF 2 "nonimmediate_operand" "fm")]))]
+ "TARGET_80387 && !TARGET_SSE_MATH
+ && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (if_then_else (match_operand:SF 3 "mult_operator" "")
+ (const_string "fmul")
+ (const_string "fop")))
+ (set_attr "mode" "SF")])
+
(define_insn "*fop_sf_comm"
[(set (match_operand:SF 0 "register_operand" "=f#x,x#f")
(match_operator:SF 3 "binary_fp_operator"
[(match_operand:SF 1 "register_operand" "%0,0")
(match_operand:SF 2 "nonimmediate_operand" "fm#x,xm#f")]))]
! "TARGET_80387 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387
&& GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'"
"* return output_387_binary_op (insn, operands);"
[(set (attr "type")
***************
*** 13870,13886 ****
(match_operator:SF 3 "binary_fp_operator"
[(match_operand:SF 1 "register_operand" "%0")
(match_operand:SF 2 "nonimmediate_operand" "xm")]))]
! "TARGET_SSE && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'"
"* return output_387_binary_op (insn, operands);"
[(set_attr "type" "sse")
(set_attr "mode" "SF")])
(define_insn "*fop_df_comm"
[(set (match_operand:DF 0 "register_operand" "=f#Y,Y#f")
(match_operator:DF 3 "binary_fp_operator"
[(match_operand:DF 1 "register_operand" "%0,0")
(match_operand:DF 2 "nonimmediate_operand" "fm#Y,Ym#f")]))]
! "TARGET_80387 && (!TARGET_SSE2 || TARGET_MIX_SSE_I387)
&& GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'"
"* return output_387_binary_op (insn, operands);"
[(set (attr "type")
--- 13884,13914 ----
(match_operator:SF 3 "binary_fp_operator"
[(match_operand:SF 1 "register_operand" "%0")
(match_operand:SF 2 "nonimmediate_operand" "xm")]))]
! "TARGET_SSE_MATH && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'"
"* return output_387_binary_op (insn, operands);"
[(set_attr "type" "sse")
(set_attr "mode" "SF")])
+ (define_insn "*fop_df_comm_nosse"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (match_operator:DF 3 "binary_fp_operator"
+ [(match_operand:DF 1 "register_operand" "%0")
+ (match_operand:DF 2 "nonimmediate_operand" "fm")]))]
+ "TARGET_80387 && (!TARGET_SSE2 || !TARGET_SSE_MATH)
+ && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (if_then_else (match_operand:SF 3 "mult_operator" "")
+ (const_string "fmul")
+ (const_string "fop")))
+ (set_attr "mode" "DF")])
+
(define_insn "*fop_df_comm"
[(set (match_operand:DF 0 "register_operand" "=f#Y,Y#f")
(match_operator:DF 3 "binary_fp_operator"
[(match_operand:DF 1 "register_operand" "%0,0")
(match_operand:DF 2 "nonimmediate_operand" "fm#Y,Ym#f")]))]
! "TARGET_80387 && TARGET_SSE_MATH && TARGET_SSE2 && TARGET_MIX_SSE_I387
&& GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'"
"* return output_387_binary_op (insn, operands);"
[(set (attr "type")
***************
*** 13896,13902 ****
(match_operator:DF 3 "binary_fp_operator"
[(match_operand:DF 1 "register_operand" "%0")
(match_operand:DF 2 "nonimmediate_operand" "Ym")]))]
! "TARGET_SSE2
&& GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'"
"* return output_387_binary_op (insn, operands);"
[(set_attr "type" "sse")
--- 13924,13930 ----
(match_operator:DF 3 "binary_fp_operator"
[(match_operand:DF 1 "register_operand" "%0")
(match_operand:DF 2 "nonimmediate_operand" "Ym")]))]
! "TARGET_SSE2 && TARGET_SSE_MATH
&& GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'"
"* return output_387_binary_op (insn, operands);"
[(set_attr "type" "sse")
***************
*** 13929,13940 ****
(const_string "fop")))
(set_attr "mode" "XF")])
(define_insn "*fop_sf_1"
[(set (match_operand:SF 0 "register_operand" "=f,f,x")
(match_operator:SF 3 "binary_fp_operator"
[(match_operand:SF 1 "nonimmediate_operand" "0,fm,0")
(match_operand:SF 2 "nonimmediate_operand" "fm,0,xm#f")]))]
! "TARGET_80387 && (!TARGET_SSE || TARGET_MIX_SSE_I387)
&& GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
"* return output_387_binary_op (insn, operands);"
--- 13957,13986 ----
(const_string "fop")))
(set_attr "mode" "XF")])
+ (define_insn "*fop_sf_1_nosse"
+ [(set (match_operand:SF 0 "register_operand" "=f,f")
+ (match_operator:SF 3 "binary_fp_operator"
+ [(match_operand:SF 1 "nonimmediate_operand" "0,fm")
+ (match_operand:SF 2 "nonimmediate_operand" "fm,0")]))]
+ "TARGET_80387 && !TARGET_SSE_MATH
+ && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:SF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:SF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "SF")])
+
(define_insn "*fop_sf_1"
[(set (match_operand:SF 0 "register_operand" "=f,f,x")
(match_operator:SF 3 "binary_fp_operator"
[(match_operand:SF 1 "nonimmediate_operand" "0,fm,0")
(match_operand:SF 2 "nonimmediate_operand" "fm,0,xm#f")]))]
! "TARGET_80387 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387
&& GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
"* return output_387_binary_op (insn, operands);"
***************
*** 13954,13960 ****
(match_operator:SF 3 "binary_fp_operator"
[(match_operand:SF 1 "register_operand" "0")
(match_operand:SF 2 "nonimmediate_operand" "xm")]))]
! "TARGET_SSE
&& GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'"
"* return output_387_binary_op (insn, operands);"
[(set_attr "type" "sse")
--- 14000,14006 ----
(match_operator:SF 3 "binary_fp_operator"
[(match_operand:SF 1 "register_operand" "0")
(match_operand:SF 2 "nonimmediate_operand" "xm")]))]
! "TARGET_SSE_MATH
&& GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'"
"* return output_387_binary_op (insn, operands);"
[(set_attr "type" "sse")
***************
*** 13966,13972 ****
(match_operator:SF 3 "binary_fp_operator"
[(float:SF (match_operand:SI 1 "nonimmediate_operand" "m,?r"))
(match_operand:SF 2 "register_operand" "0,0")]))]
! "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE"
"* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
[(set (attr "type")
(cond [(match_operand:SF 3 "mult_operator" "")
--- 14012,14018 ----
(match_operator:SF 3 "binary_fp_operator"
[(float:SF (match_operand:SI 1 "nonimmediate_operand" "m,?r"))
(match_operand:SF 2 "register_operand" "0,0")]))]
! "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE_MATH"
"* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
[(set (attr "type")
(cond [(match_operand:SF 3 "mult_operator" "")
***************
*** 13984,13990 ****
(match_operator:SF 3 "binary_fp_operator"
[(match_operand:SF 1 "register_operand" "0,0")
(float:SF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))]
! "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE"
"* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
[(set (attr "type")
(cond [(match_operand:SF 3 "mult_operator" "")
--- 14030,14036 ----
(match_operator:SF 3 "binary_fp_operator"
[(match_operand:SF 1 "register_operand" "0,0")
(float:SF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))]
! "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE_MATH"
"* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
[(set (attr "type")
(cond [(match_operand:SF 3 "mult_operator" "")
***************
*** 13997,14008 ****
(set_attr "ppro_uops" "many")
(set_attr "mode" "SI")])
(define_insn "*fop_df_1"
[(set (match_operand:DF 0 "register_operand" "=f#Y,f#Y,Y#f")
(match_operator:DF 3 "binary_fp_operator"
[(match_operand:DF 1 "nonimmediate_operand" "0,fm,0")
(match_operand:DF 2 "nonimmediate_operand" "fm,0,Ym#f")]))]
! "TARGET_80387 && (!TARGET_SSE2 || TARGET_MIX_SSE_I387)
&& GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
"* return output_387_binary_op (insn, operands);"
--- 14043,14073 ----
(set_attr "ppro_uops" "many")
(set_attr "mode" "SI")])
+ (define_insn "*fop_df_1_nosse"
+ [(set (match_operand:DF 0 "register_operand" "=f,f")
+ (match_operator:DF 3 "binary_fp_operator"
+ [(match_operand:DF 1 "nonimmediate_operand" "0,fm")
+ (match_operand:DF 2 "nonimmediate_operand" "fm,0")]))]
+ "TARGET_80387 && (!TARGET_SSE2 || !TARGET_SSE_MATH)
+ && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:DF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:DF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "DF")])
+
+
(define_insn "*fop_df_1"
[(set (match_operand:DF 0 "register_operand" "=f#Y,f#Y,Y#f")
(match_operator:DF 3 "binary_fp_operator"
[(match_operand:DF 1 "nonimmediate_operand" "0,fm,0")
(match_operand:DF 2 "nonimmediate_operand" "fm,0,Ym#f")]))]
! "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387
&& GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
"* return output_387_binary_op (insn, operands);"
***************
*** 14022,14028 ****
(match_operator:DF 3 "binary_fp_operator"
[(match_operand:DF 1 "register_operand" "0")
(match_operand:DF 2 "nonimmediate_operand" "Ym")]))]
! "TARGET_SSE
&& GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'"
"* return output_387_binary_op (insn, operands);"
[(set_attr "type" "sse")])
--- 14087,14093 ----
(match_operator:DF 3 "binary_fp_operator"
[(match_operand:DF 1 "register_operand" "0")
(match_operand:DF 2 "nonimmediate_operand" "Ym")]))]
! "TARGET_SSE2 && TARGET_SSE_MATH
&& GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'"
"* return output_387_binary_op (insn, operands);"
[(set_attr "type" "sse")])
***************
*** 14033,14039 ****
(match_operator:DF 3 "binary_fp_operator"
[(float:DF (match_operand:SI 1 "nonimmediate_operand" "m,?r"))
(match_operand:DF 2 "register_operand" "0,0")]))]
! "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE2"
"* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
[(set (attr "type")
(cond [(match_operand:DF 3 "mult_operator" "")
--- 14098,14104 ----
(match_operator:DF 3 "binary_fp_operator"
[(float:DF (match_operand:SI 1 "nonimmediate_operand" "m,?r"))
(match_operand:DF 2 "register_operand" "0,0")]))]
! "TARGET_80387 && TARGET_USE_FIOP && !(TARGET_SSE2 && TARGET_SSE_MATH)"
"* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
[(set (attr "type")
(cond [(match_operand:DF 3 "mult_operator" "")
***************
*** 14051,14057 ****
(match_operator:DF 3 "binary_fp_operator"
[(match_operand:DF 1 "register_operand" "0,0")
(float:DF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))]
! "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE2"
"* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
[(set (attr "type")
(cond [(match_operand:DF 3 "mult_operator" "")
--- 14116,14122 ----
(match_operator:DF 3 "binary_fp_operator"
[(match_operand:DF 1 "register_operand" "0,0")
(float:DF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))]
! "TARGET_80387 && TARGET_USE_FIOP && !(TARGET_SSE2 && TARGET_SSE_MATH)"
"* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
[(set (attr "type")
(cond [(match_operand:DF 3 "mult_operator" "")
***************
*** 14087,14093 ****
[(match_operand:DF 1 "register_operand" "0,f")
(float_extend:DF
(match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
! "TARGET_80387 && !TARGET_SSE2"
"* return output_387_binary_op (insn, operands);"
[(set (attr "type")
(cond [(match_operand:DF 3 "mult_operator" "")
--- 14152,14158 ----
[(match_operand:DF 1 "register_operand" "0,f")
(float_extend:DF
(match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
! "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)"
"* return output_387_binary_op (insn, operands);"
[(set (attr "type")
(cond [(match_operand:DF 3 "mult_operator" "")
***************
*** 14421,14429 ****
(define_expand "sqrtdf2"
[(set (match_operand:DF 0 "register_operand" "")
(sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "")))]
! "(! TARGET_NO_FANCY_MATH_387 && TARGET_80387) || TARGET_SSE2"
{
! if (!TARGET_SSE2)
operands[1] = force_reg (DFmode, operands[1]);
})
--- 14486,14495 ----
(define_expand "sqrtdf2"
[(set (match_operand:DF 0 "register_operand" "")
(sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "")))]
! "(! TARGET_NO_FANCY_MATH_387 && TARGET_80387)
! || (TARGET_SSE2 && TARGET_SSE_MATH)"
{
! if (!TARGET_SSE2 || !TARGET_SSE_MATH)
operands[1] = force_reg (DFmode, operands[1]);
})
***************
*** 14431,14437 ****
[(set (match_operand:DF 0 "register_operand" "=f#Y,Y#f")
(sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "0#Y,Ym#f")))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
! && (TARGET_SSE2 && TARGET_MIX_SSE_I387)"
"@
fsqrt
sqrtsd\t{%1, %0|%0, %1}"
--- 14497,14503 ----
[(set (match_operand:DF 0 "register_operand" "=f#Y,Y#f")
(sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "0#Y,Ym#f")))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
! && (TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387)"
"@
fsqrt
sqrtsd\t{%1, %0|%0, %1}"
***************
*** 14442,14448 ****
(define_insn "sqrtdf2_1_sse_only"
[(set (match_operand:DF 0 "register_operand" "=Y")
(sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "Ym")))]
! "TARGET_SSE2 && (!TARGET_80387 || !TARGET_MIX_SSE_I387)"
"sqrtsd\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "mode" "DF")
--- 14508,14514 ----
(define_insn "sqrtdf2_1_sse_only"
[(set (match_operand:DF 0 "register_operand" "=Y")
(sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "Ym")))]
! "TARGET_SSE2 && TARGET_SSE_MATH && (!TARGET_80387 || !TARGET_MIX_SSE_I387)"
"sqrtsd\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "mode" "DF")
***************
*** 14452,14458 ****
[(set (match_operand:DF 0 "register_operand" "=f")
(sqrt:DF (match_operand:DF 1 "register_operand" "0")))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
! && (!TARGET_SSE2 && !TARGET_MIX_SSE_I387)"
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "DF")
--- 14518,14524 ----
[(set (match_operand:DF 0 "register_operand" "=f")
(sqrt:DF (match_operand:DF 1 "register_operand" "0")))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
! && (!TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387)"
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "DF")
***************
*** 14462,14468 ****
[(set (match_operand:DF 0 "register_operand" "=f")
(sqrt:DF (float_extend:DF
(match_operand:SF 1 "register_operand" "0"))))]
! "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && !TARGET_SSE2"
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "DF")
--- 14528,14535 ----
[(set (match_operand:DF 0 "register_operand" "=f")
(sqrt:DF (float_extend:DF
(match_operand:SF 1 "register_operand" "0"))))]
! "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
! && !(TARGET_SSE2 && TARGET_SSE_MATH)"
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "DF")
***************
*** 15963,15969 ****
(match_dup 1)
(match_dup 2)))
(clobber (reg:CC 17))])]
! "TARGET_SSE2"
"#")
(define_insn "*mindf"
--- 16030,16036 ----
(match_dup 1)
(match_dup 2)))
(clobber (reg:CC 17))])]
! "TARGET_SSE2 && TARGET_SSE_MATH"
"#")
(define_insn "*mindf"
***************
*** 15973,15979 ****
(match_dup 1)
(match_dup 2)))
(clobber (reg:CC 17))]
! "TARGET_SSE2 && TARGET_IEEE_FP"
"#")
(define_insn "*mindf_nonieee"
--- 16040,16046 ----
(match_dup 1)
(match_dup 2)))
(clobber (reg:CC 17))]
! "TARGET_SSE2 && TARGET_IEEE_FP && TARGET_SSE_MATH"
"#")
(define_insn "*mindf_nonieee"
***************
*** 15983,15989 ****
(match_dup 1)
(match_dup 2)))
(clobber (reg:CC 17))]
! "TARGET_SSE2 && !TARGET_IEEE_FP"
"#")
(define_split
--- 16050,16056 ----
(match_dup 1)
(match_dup 2)))
(clobber (reg:CC 17))]
! "TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_IEEE_FP"
"#")
(define_split
***************
*** 16031,16037 ****
(match_operand:DF 2 "nonimmediate_operand" "Ym"))
(match_dup 1)
(match_dup 2)))]
! "TARGET_SSE2 && reload_completed"
"minsd\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")
(set_attr "mode" "DF")])
--- 16098,16104 ----
(match_operand:DF 2 "nonimmediate_operand" "Ym"))
(match_dup 1)
(match_dup 2)))]
! "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed"
"minsd\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")
(set_attr "mode" "DF")])
***************
*** 16124,16130 ****
(match_dup 1)
(match_dup 2)))
(clobber (reg:CC 17))])]
! "TARGET_SSE2"
"#")
(define_insn "*maxdf"
--- 16191,16197 ----
(match_dup 1)
(match_dup 2)))
(clobber (reg:CC 17))])]
! "TARGET_SSE2 && TARGET_SSE_MATH"
"#")
(define_insn "*maxdf"
***************
*** 16134,16140 ****
(match_dup 1)
(match_dup 2)))
(clobber (reg:CC 17))]
! "TARGET_SSE2 && TARGET_IEEE_FP"
"#")
(define_insn "*maxdf_nonieee"
--- 16201,16207 ----
(match_dup 1)
(match_dup 2)))
(clobber (reg:CC 17))]
! "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_IEEE_FP"
"#")
(define_insn "*maxdf_nonieee"
***************
*** 16144,16150 ****
(match_dup 1)
(match_dup 2)))
(clobber (reg:CC 17))]
! "TARGET_SSE2 && !TARGET_IEEE_FP"
"#")
(define_split
--- 16211,16217 ----
(match_dup 1)
(match_dup 2)))
(clobber (reg:CC 17))]
! "TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_IEEE_FP"
"#")
(define_split
***************
*** 16191,16197 ****
(match_operand:DF 2 "nonimmediate_operand" "Ym"))
(match_dup 1)
(match_dup 2)))]
! "TARGET_SSE2 && reload_completed"
"maxsd\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")
(set_attr "mode" "DF")])
--- 16258,16264 ----
(match_operand:DF 2 "nonimmediate_operand" "Ym"))
(match_dup 1)
(match_dup 2)))]
! "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed"
"maxsd\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")
(set_attr "mode" "DF")])
Index: doc/invoke.texi
===================================================================
RCS file: /cvs/gcc/egcs/gcc/doc/invoke.texi,v
retrieving revision 1.89
diff -c -3 -p -r1.89 invoke.texi
*** invoke.texi 2001/12/12 22:50:05 1.89
--- invoke.texi 2001/12/13 09:45:33
*************** in the following sections.
*** 473,484 ****
@emph{i386 and x86-64 Options}
@gccoptlist{
! -mcpu=@var{cpu-type} -march=@var{cpu-type} @gol
-mintel-syntax -mieee-fp -mno-fancy-math-387 @gol
-mno-fp-ret-in-387 -msoft-float -msvr3-shlib @gol
-mno-wide-multiply -mrtd -malign-double @gol
-mpreferred-stack-boundary=@var{num} @gol
! -mmmx -msse -m3dnow @gol
-mthreads -mno-align-stringops -minline-all-stringops @gol
-mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
-m96bit-long-double -mregparm=@var{num} -momit-leaf-frame-pointer @gol
--- 473,484 ----
@emph{i386 and x86-64 Options}
@gccoptlist{
! -mcpu=@var{cpu-type} -march=@var{cpu-type} -mfpmath=@var{unit} @gol
-mintel-syntax -mieee-fp -mno-fancy-math-387 @gol
-mno-fp-ret-in-387 -msoft-float -msvr3-shlib @gol
-mno-wide-multiply -mrtd -malign-double @gol
-mpreferred-stack-boundary=@var{num} @gol
! -mmmx -msse -msse2 -msse-math -m3dnow @gol
-mthreads -mno-align-stringops -minline-all-stringops @gol
-mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
-m96bit-long-double -mregparm=@var{num} -momit-leaf-frame-pointer @gol
*************** These options are synonyms for @option{-
*** 7506,7511 ****
--- 7506,7553 ----
@option{-mcpu=pentium}, and @option{-mcpu=pentiumpro} respectively.
These synonyms are deprecated.
+ @item -mfpmath=@var{unit}
+ @opindex march
+ generate floating point arithmetics for selected unit @var{unit}. the choices
+ for @var{unit} are:
+
+ @table @samp
+ @item 387
+ Use the standard 387 floating point coprocessor present majority of chips and
+ emulated otherwise. Code compiled with this option will run almost everywhere.
+ The temporary results are computed in 80bit precesion instead of precision
+ specified by the type resulting in slightly different results compared to most
+ of other chips. See @option{-ffloat-store} for more detailed description.
+
+ This is the default choice for i386 compiler.
+
+ @item sse
+ Use scalar floating point instructions present in the SSE instruction set.
+ This instruction set is supported by Pentium3 and newer chips, in the AMD line
+ by Athlon-4, Athlon-xp and Athlon-mp chips. The earlier version of SSE
+ instruction set supports only single precision arithmetics, thus the double and
+ extended precision arithmetics is still done using 387. Later version, present
+ only in Pentium4 and the future AMD x86-64 chips supports double precision
+ arithmetics too.
+
+ For i387 you need to use @option{-march=@var{cpu-type}}, @option{-msse} or
+ @option{-msse2} switches to enable SSE extensions and make this option
+ effective. For x86-64 compiler, these extensions are enabled by default.
+
+ The resulting code should be considerably faster in majority of cases and avoid
+ the numerical instability problems of 387 code, but may break some existing
+ code that expects temporaries to be 80bit.
+
+ This is the default choice for x86-64 compiler.
+
+ @item sse,387
+ Attempt to utilize both instruction sets at once. This effectivly double the
+ amount of available registers and on chips with separate execution units for
+ 387 and SSE the execution resources too. Use this option with care, as it is
+ still experimental, because gcc register allocator does not model separate
+ functional units well resulting in instable performance.
+ @end table
+
@item -mintel-syntax
@opindex mintel-syntax
Emit assembly using Intel syntax opcodes instead of AT&T syntax.
*************** preferred alignment to @option{-mpreferr
*** 7656,7661 ****
--- 7698,7705 ----
@itemx -mno-mmx
@item -msse
@itemx -mno-sse
+ @item -msse2
+ @itemx -mno-sse2
@item -m3dnow
@itemx -mno-3dnow
@opindex mmmx