/* Subroutines used for code generation on IA-32.
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
- 2002, 2003, 2004 Free Software Foundation, Inc.
+ 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This file is part of GCC.
else
emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
- op0 = gen_lowpart (V4SFmode, op0);
- m = adjust_address (op1, V4SFmode, 0);
- emit_insn (gen_sse_movlps (op0, op0, m));
- m = adjust_address (op1, V4SFmode, 8);
- emit_insn (gen_sse_movhps (op0, op0, m));
+ m = adjust_address (op1, V2SFmode, 0);
+ emit_insn (gen_sse_loadlps (op0, op0, m));
+ m = adjust_address (op1, V2SFmode, 8);
+ emit_insn (gen_sse_loadhps (op0, op0, m));
}
}
else if (MEM_P (op0))
}
else
{
- op1 = gen_lowpart (V4SFmode, op1);
- m = adjust_address (op0, V4SFmode, 0);
- emit_insn (gen_sse_movlps (m, m, op1));
- m = adjust_address (op0, V4SFmode, 8);
- emit_insn (gen_sse_movhps (m, m, op1));
+ m = adjust_address (op0, V2SFmode, 0);
+ emit_insn (gen_sse_storelps (m, op1));
+ m = adjust_address (op0, V2SFmode, 8);
+ emit_insn (gen_sse_storehps (m, op1));
return;
}
}
case IX86_BUILTIN_LOADLPS:
case IX86_BUILTIN_LOADHPD:
case IX86_BUILTIN_LOADLPD:
- icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
- : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
+ icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
+ : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
: fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
: CODE_FOR_sse2_loadlpd);
arg0 = TREE_VALUE (arglist);
case IX86_BUILTIN_STOREHPS:
case IX86_BUILTIN_STORELPS:
- icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
- : CODE_FOR_sse_movlps);
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
- op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
- mode0 = insn_data[icode].operand[1].mode;
- mode1 = insn_data[icode].operand[2].mode;
-
- op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
-
- pat = GEN_FCN (icode) (op0, op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return const0_rtx;
-
case IX86_BUILTIN_STOREHPD:
case IX86_BUILTIN_STORELPD:
- icode = (fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
+ icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
+ : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_storelps
+ : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
: CODE_FOR_sse2_storelpd);
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
;; GCC machine description for IA-32 and x86-64.
;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-;; 2001, 2002, 2003, 2004
+;; 2001, 2002, 2003, 2004, 2005
;; Free Software Foundation, Inc.
;; Mostly by William Schelter.
;; x86_64 support added by Jan Hubicka
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
-(define_insn "sse_movhps"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
- (vec_merge:V4SF
- (match_operand:V4SF 1 "nonimmediate_operand" "0,0")
- (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
- (const_int 12)))]
- "TARGET_SSE
- && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
- "movhps\t{%2, %0|%0, %2}"
+;; Store the high V2SF of the source vector to the destination.
+(define_insn "sse_storehps"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
+ (parallel [(const_int 2) (const_int 3)])))]
+ "TARGET_SSE"
+ "@
+ movhps\t{%1, %0|%0, %1}
+ movhlps\t{%1, %0|%0, %1}
+ #"
[(set_attr "type" "ssecvt")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "V2SF")])
-(define_insn "sse_movlps"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
- (vec_merge:V4SF
- (match_operand:V4SF 1 "nonimmediate_operand" "0,0")
- (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
- (const_int 3)))]
- "TARGET_SSE
- && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
- "movlps\t{%2, %0|%0, %2}"
+(define_split
+ [(set (match_operand:V2SF 0 "register_operand" "")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "memory_operand" "")
+ (parallel [(const_int 2) (const_int 3)])))]
+ "TARGET_SSE && reload_completed"
+ [(const_int 0)]
+{
+ emit_move_insn (operands[0], adjust_address (operands[1], V2SFmode, 8));
+ DONE;
+})
+
+;; Load the high V2SF of the target vector from the source vector.
+(define_insn "sse_loadhps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_concat:V4SF
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
+ "TARGET_SSE"
+ "@
+ movhps\t{%2, %0|%0, %2}
+ movlhps\t{%2, %0|%0, %2}
+ #"
[(set_attr "type" "ssecvt")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "V2SF")])
+
+(define_split
+ [(set (match_operand:V4SF 0 "memory_operand" "")
+ (vec_concat:V4SF
+ (vec_select:V2SF
+ (match_dup 0)
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:V2SF 2 "register_operand" "")))]
+ "TARGET_SSE && reload_completed"
+ [(const_int 0)]
+{
+ emit_move_insn (adjust_address (operands[0], V2SFmode, 8), operands[1]);
+ DONE;
+})
+
+;; Store the low V2SF of the source vector to the destination.
+(define_expand "sse_storelps"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_SSE"
+{
+ operands[1] = gen_lowpart (V2SFmode, operands[1]);
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+})
+
+;; Load the low V2SF of the target vector from the source vector.
+(define_insn "sse_loadlps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_concat:V4SF
+ (match_operand:V2SF 2 "nonimmediate_operand" "m,0,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "0,x,0")
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_SSE"
+{
+ static const char * const alt[] = {
+ "movlps\t{%2, %0|%0, %2}",
+ "shufps\t{%2, %1, %0|%0, %1, %2}",
+ "movlps\t{%2, %0|%0, %2}"
+ };
+
+ if (which_alternative == 1)
+ operands[2] = GEN_INT (0xe4);
+
+ return alt[which_alternative];
+}
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2SF")])
(define_expand "sse_loadss"
[(match_operand:V4SF 0 "register_operand" "")
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm")
- (match_operand:SI 3 "immediate_operand" "i")]
+ (match_operand:SI 3 "const_int_operand" "n")]
UNSPEC_SHUFFLE))]
"TARGET_SSE"
- ;; @@@ check operand order for intel/nonintel syntax
"shufps\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
[(set (match_operand:DF 0 "nonimmediate_operand" "")
(vec_select:DF
(match_operand:V2DF 1 "nonimmediate_operand" "")
- (parallel [(const_int 1)])))]
+ (parallel [(const_int 0)])))]
"TARGET_SSE2"
{
operands[1] = gen_lowpart (DFmode, operands[1]);
DONE;
})
-;; Load the load double of the target vector from the source scalar.
+;; Load the low double of the target vector from the source scalar.
(define_insn "sse2_loadlpd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,m")
(vec_concat:V2DF