This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[bfin] Add more new Blackfin builtins
- From: Jie Zhang <jzhang918 at gmail dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: Bernd Schmidt <bernds_cb1 at t-online dot de>
- Date: Sat, 25 Aug 2007 01:13:50 +0800
- Subject: [bfin] Add more new Blackfin builtins
This patch adds more Blackfin builtins in gcc: __builtin_bfin_ones,
__builtin_bfin_cmplx_mul_s40, __builtin_bfin_cmplx_mac_s40,
__builtin_bfin_cmplx_msu_s40 and __builtin_bfin_csqu_fr16.
It also adds __builtin_bfin_cmplx_add and __builtin_bfin_cmplx_sub as
aliases to __builtin_bfin_add_fr2x16 and __builtin_bfin_sub_fr2x16
respectively.
It also enables __builtin_bfin_min_fr1x16, __builtin_bfin_max_fr1x16,
__builtin_bfin_min_fr1x32 and __builtin_bfin_max_fr1x32, whose
implementations have been in gcc already.
OK to commit?
Jie
* config/bfin/bfin.c (enum bfin_builtins): Add BFIN_BUILTIN_ONES,
BFIN_BUILTIN_CPLX_MUL_16_S40, BFIN_BUILTIN_CPLX_MAC_16_S40,
BFIN_BUILTIN_CPLX_MSU_16_S40, and BFIN_BUILTIN_CPLX_SQU.
(bfin_init_builtins): Initialize __builtin_bfin_ones,
__builtin_bfin_min_fr1x16, __builtin_bfin_max_fr1x16,
__builtin_bfin_min_fr1x32, __builtin_bfin_max_fr1x32,
__builtin_bfin_cmplx_add, __builtin_bfin_cmplx_sub,
__builtin_bfin_cmplx_mul_s40, __builtin_bfin_cmplx_mac_s40,
__builtin_bfin_cmplx_msu_s40 and __builtin_bfin_csqu_fr16.
(bdesc_1arg): Add __builtin_bfin_ones.
(bfin_expand_builtin): Expand __builtin_bfin_cmplx_mul_s40,
__builtin_bfin_cmplx_mac_s40, __builtin_bfin_cmplx_msu_s40,
and __builtin_bfin_csqu_fr16.
* config/bfin/bfin.md (UNSPEC_ONES): New constant.
(ones): New define_insn.
(ssaddhi3_parts): New define_insn.
(sssubhi3_parts): New define_insn.
(flag_mulhi_parts): New define_insn.
Index: gcc/config/bfin/bfin.c
===================================================================
--- gcc.orig/config/bfin/bfin.c 2007-08-25 00:08:48.000000000 +0800
+++ gcc/config/bfin/bfin.c 2007-08-25 01:10:49.000000000 +0800
@@ -5034,6 +5034,7 @@
{
BFIN_BUILTIN_CSYNC,
BFIN_BUILTIN_SSYNC,
+ BFIN_BUILTIN_ONES,
BFIN_BUILTIN_COMPOSE_2X16,
BFIN_BUILTIN_EXTRACTLO,
BFIN_BUILTIN_EXTRACTHI,
@@ -5090,6 +5091,12 @@
BFIN_BUILTIN_CPLX_MAC_16,
BFIN_BUILTIN_CPLX_MSU_16,
+ BFIN_BUILTIN_CPLX_MUL_16_S40,
+ BFIN_BUILTIN_CPLX_MAC_16_S40,
+ BFIN_BUILTIN_CPLX_MSU_16_S40,
+
+ BFIN_BUILTIN_CPLX_SQU,
+
BFIN_BUILTIN_MAX
};
@@ -5149,6 +5156,8 @@
def_builtin ("__builtin_bfin_csync", void_ftype_void, BFIN_BUILTIN_CSYNC);
def_builtin ("__builtin_bfin_ssync", void_ftype_void, BFIN_BUILTIN_SSYNC);
+ def_builtin ("__builtin_bfin_ones", short_ftype_int, BFIN_BUILTIN_ONES);
+
def_builtin ("__builtin_bfin_compose_2x16", v2hi_ftype_int_int,
BFIN_BUILTIN_COMPOSE_2X16);
def_builtin ("__builtin_bfin_extract_hi", short_ftype_v2hi,
@@ -5178,6 +5187,11 @@
def_builtin ("__builtin_bfin_abs_fr2x16", v2hi_ftype_v2hi,
BFIN_BUILTIN_ABS_2X16);
+ def_builtin ("__builtin_bfin_min_fr1x16", short_ftype_int_int,
+ BFIN_BUILTIN_MIN_1X16);
+ def_builtin ("__builtin_bfin_max_fr1x16", short_ftype_int_int,
+ BFIN_BUILTIN_MAX_1X16);
+
def_builtin ("__builtin_bfin_add_fr1x16", short_ftype_int_int,
BFIN_BUILTIN_SSADD_1X16);
def_builtin ("__builtin_bfin_sub_fr1x16", short_ftype_int_int,
@@ -5209,6 +5223,11 @@
def_builtin ("__builtin_bfin_mulhisihh", int_ftype_v2hi_v2hi,
BFIN_BUILTIN_MULHISIHH);
+ def_builtin ("__builtin_bfin_min_fr1x32", int_ftype_int_int,
+ BFIN_BUILTIN_MIN_1X32);
+ def_builtin ("__builtin_bfin_max_fr1x32", int_ftype_int_int,
+ BFIN_BUILTIN_MAX_1X32);
+
def_builtin ("__builtin_bfin_add_fr1x32", int_ftype_int_int,
BFIN_BUILTIN_SSADD_1X32);
def_builtin ("__builtin_bfin_sub_fr1x32", int_ftype_int_int,
@@ -5241,12 +5260,24 @@
BFIN_BUILTIN_SSASHIFT_1X32);
/* Complex numbers. */
+ def_builtin ("__builtin_bfin_cmplx_add", v2hi_ftype_v2hi_v2hi,
+ BFIN_BUILTIN_SSADD_2X16);
+ def_builtin ("__builtin_bfin_cmplx_sub", v2hi_ftype_v2hi_v2hi,
+ BFIN_BUILTIN_SSSUB_2X16);
def_builtin ("__builtin_bfin_cmplx_mul", v2hi_ftype_v2hi_v2hi,
BFIN_BUILTIN_CPLX_MUL_16);
def_builtin ("__builtin_bfin_cmplx_mac", v2hi_ftype_v2hi_v2hi_v2hi,
BFIN_BUILTIN_CPLX_MAC_16);
def_builtin ("__builtin_bfin_cmplx_msu", v2hi_ftype_v2hi_v2hi_v2hi,
BFIN_BUILTIN_CPLX_MSU_16);
+ def_builtin ("__builtin_bfin_cmplx_mul_s40", v2hi_ftype_v2hi_v2hi,
+ BFIN_BUILTIN_CPLX_MUL_16_S40);
+ def_builtin ("__builtin_bfin_cmplx_mac_s40", v2hi_ftype_v2hi_v2hi_v2hi,
+ BFIN_BUILTIN_CPLX_MAC_16_S40);
+ def_builtin ("__builtin_bfin_cmplx_msu_s40", v2hi_ftype_v2hi_v2hi_v2hi,
+ BFIN_BUILTIN_CPLX_MSU_16_S40);
+ def_builtin ("__builtin_bfin_csqu_fr16", v2hi_ftype_v2hi,
+ BFIN_BUILTIN_CPLX_SQU);
}
@@ -5294,6 +5325,8 @@
static const struct builtin_description bdesc_1arg[] =
{
+ { CODE_FOR_ones, "__builtin_bfin_ones", BFIN_BUILTIN_ONES, 0 },
+
{ CODE_FOR_signbitshi2, "__builtin_bfin_norm_fr1x16", BFIN_BUILTIN_NORM_1X16, 0 },
{ CODE_FOR_ssneghi2, "__builtin_bfin_negate_fr1x16", BFIN_BUILTIN_NEG_1X16, 0 },
{ CODE_FOR_abshi2, "__builtin_bfin_abs_fr1x16", BFIN_BUILTIN_ABS_1X16, 0 },
@@ -5534,6 +5567,7 @@
return target;
case BFIN_BUILTIN_CPLX_MUL_16:
+ case BFIN_BUILTIN_CPLX_MUL_16_S40:
arg0 = CALL_EXPR_ARG (exp, 0);
arg1 = CALL_EXPR_ARG (exp, 1);
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
@@ -5549,9 +5583,14 @@
if (! register_operand (op1, GET_MODE (op1)))
op1 = copy_to_mode_reg (GET_MODE (op1), op1);
- emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx,
- const0_rtx, const0_rtx,
- const1_rtx, GEN_INT (MACFLAG_NONE)));
+ if (fcode == BFIN_BUILTIN_CPLX_MUL_16)
+ emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx,
+ const0_rtx, const0_rtx,
+ const1_rtx, GEN_INT (MACFLAG_W32)));
+ else
+ emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx,
+ const0_rtx, const0_rtx,
+ const1_rtx, GEN_INT (MACFLAG_NONE)));
emit_insn (gen_flag_macv2hi_parts (target, op0, op1, const1_rtx,
const1_rtx, const1_rtx,
const0_rtx, accvec, const1_rtx, const0_rtx,
@@ -5561,6 +5600,8 @@
case BFIN_BUILTIN_CPLX_MAC_16:
case BFIN_BUILTIN_CPLX_MSU_16:
+ case BFIN_BUILTIN_CPLX_MAC_16_S40:
+ case BFIN_BUILTIN_CPLX_MSU_16_S40:
arg0 = CALL_EXPR_ARG (exp, 0);
arg1 = CALL_EXPR_ARG (exp, 1);
arg2 = CALL_EXPR_ARG (exp, 2);
@@ -5584,13 +5625,30 @@
emit_move_insn (tmp2, gen_lowpart (SImode, op0));
emit_insn (gen_movstricthi_1 (gen_lowpart (HImode, tmp2), const0_rtx));
emit_insn (gen_load_accumulator_pair (accvec, tmp1, tmp2));
- emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx,
- const0_rtx, const0_rtx,
- const1_rtx, accvec, const0_rtx,
- const0_rtx,
- GEN_INT (MACFLAG_W32)));
- tmp1 = (fcode == BFIN_BUILTIN_CPLX_MAC_16 ? const1_rtx : const0_rtx);
- tmp2 = (fcode == BFIN_BUILTIN_CPLX_MAC_16 ? const0_rtx : const1_rtx);
+ if (fcode == BFIN_BUILTIN_CPLX_MAC_16
+ || fcode == BFIN_BUILTIN_CPLX_MSU_16)
+ emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx,
+ const0_rtx, const0_rtx,
+ const1_rtx, accvec, const0_rtx,
+ const0_rtx,
+ GEN_INT (MACFLAG_W32)));
+ else
+ emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx,
+ const0_rtx, const0_rtx,
+ const1_rtx, accvec, const0_rtx,
+ const0_rtx,
+ GEN_INT (MACFLAG_NONE)));
+ if (fcode == BFIN_BUILTIN_CPLX_MAC_16
+ || fcode == BFIN_BUILTIN_CPLX_MAC_16_S40)
+ {
+ tmp1 = const1_rtx;
+ tmp2 = const0_rtx;
+ }
+ else
+ {
+ tmp1 = const0_rtx;
+ tmp2 = const1_rtx;
+ }
emit_insn (gen_flag_macv2hi_parts (target, op1, op2, const1_rtx,
const1_rtx, const1_rtx,
const0_rtx, accvec, tmp1, tmp2,
@@ -5598,6 +5656,35 @@
return target;
+ case BFIN_BUILTIN_CPLX_SQU:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ accvec = gen_reg_rtx (V2PDImode);
+ icode = CODE_FOR_flag_mulv2hi;
+ tmp1 = gen_reg_rtx (V2HImode);
+ tmp2 = gen_reg_rtx (V2HImode);
+
+ if (! target
+ || GET_MODE (target) != V2HImode
+ || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
+ target = gen_reg_rtx (V2HImode);
+ if (! register_operand (op0, GET_MODE (op0)))
+ op0 = copy_to_mode_reg (GET_MODE (op0), op0);
+
+ emit_insn (gen_flag_mulv2hi (tmp1, op0, op0, GEN_INT (MACFLAG_NONE)));
+
+ emit_insn (gen_flag_mulhi_parts (tmp2, op0, op0, const0_rtx,
+ const0_rtx, const1_rtx,
+ GEN_INT (MACFLAG_NONE)));
+
+ emit_insn (gen_ssaddhi3_parts (target, tmp2, tmp2, const1_rtx,
+ const0_rtx, const0_rtx));
+
+ emit_insn (gen_sssubhi3_parts (target, tmp1, tmp1, const0_rtx,
+ const0_rtx, const1_rtx));
+
+ return target;
+
default:
break;
}
Index: gcc/config/bfin/bfin.md
===================================================================
--- gcc.orig/config/bfin/bfin.md 2007-08-25 00:08:48.000000000 +0800
+++ gcc/config/bfin/bfin.md 2007-08-25 01:03:23.000000000 +0800
@@ -137,7 +137,8 @@
(UNSPEC_LSETUP_END 10)
;; Distinguish a 32-bit version of an insn from a 16-bit version.
(UNSPEC_32BIT 11)
- (UNSPEC_NOP 12)])
+ (UNSPEC_NOP 12)
+ (UNSPEC_ONES 12)])
(define_constants
[(UNSPEC_VOLATILE_EH_RETURN 0)
@@ -1314,6 +1315,14 @@
%0 = %1 ^ %2;"
[(set_attr "type" "alu0")])
+(define_insn "ones"
+ [(set (match_operand:HI 0 "register_operand" "=d")
+ (unspec:HI [(match_operand:SI 1 "register_operand" "d")]
+ UNSPEC_ONES))]
+ ""
+ "%h0 = ONES %1;"
+ [(set_attr "type" "alu0")])
+
(define_insn "smaxsi3"
[(set (match_operand:SI 0 "register_operand" "=d")
(smax:SI (match_operand:SI 1 "register_operand" "d")
@@ -2903,6 +2912,60 @@
"%h0 = %h1 + %h2 (S)%!"
[(set_attr "type" "dsp32")])
+(define_insn "ssaddhi3_parts"
+ [(set (vec_select:HI
+ (match_operand:V2HI 0 "register_operand" "d")
+ (parallel [(match_operand 3 "const01_operand" "P0P1")]))
+ (ss_plus:HI (vec_select:HI
+ (match_operand:V2HI 1 "register_operand" "d")
+ (parallel [(match_operand 4 "const01_operand" "P0P1")]))
+ (vec_select:HI
+ (match_operand:V2HI 2 "register_operand" "d")
+ (parallel [(match_operand 5 "const01_operand" "P0P1")]))))]
+ ""
+{
+ const char *templates[] = {
+ "%h0 = %h1 + %h2 (S)%!",
+ "%d0 = %h1 + %h2 (S)%!",
+ "%h0 = %d1 + %h2 (S)%!",
+ "%d0 = %d1 + %h2 (S)%!",
+ "%h0 = %h1 + %d2 (S)%!",
+ "%d0 = %h1 + %d2 (S)%!",
+ "%h0 = %d1 + %d2 (S)%!",
+ "%d0 = %d1 + %d2 (S)%!" };
+ int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1)
+ + (INTVAL (operands[5]) << 2);
+ return templates[alt];
+}
+ [(set_attr "type" "dsp32")])
+
+(define_insn "sssubhi3_parts"
+ [(set (vec_select:HI
+ (match_operand:V2HI 0 "register_operand" "d")
+ (parallel [(match_operand 3 "const01_operand" "P0P1")]))
+ (ss_minus:HI (vec_select:HI
+ (match_operand:V2HI 1 "register_operand" "d")
+ (parallel [(match_operand 4 "const01_operand" "P0P1")]))
+ (vec_select:HI
+ (match_operand:V2HI 2 "register_operand" "d")
+ (parallel [(match_operand 5 "const01_operand" "P0P1")]))))]
+ ""
+{
+ const char *templates[] = {
+ "%h0 = %h1 - %h2 (S)%!",
+ "%d0 = %h1 - %h2 (S)%!",
+ "%h0 = %d1 - %h2 (S)%!",
+ "%d0 = %d1 - %h2 (S)%!",
+ "%h0 = %h1 - %d2 (S)%!",
+ "%d0 = %h1 - %d2 (S)%!",
+ "%h0 = %d1 - %d2 (S)%!",
+ "%d0 = %d1 - %d2 (S)%!" };
+ int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1)
+ + (INTVAL (operands[5]) << 2);
+ return templates[alt];
+}
+ [(set_attr "type" "dsp32")])
+
(define_insn "sssubhi3"
[(set (match_operand:HI 0 "register_operand" "=d")
(ss_minus:HI (match_operand:HI 1 "register_operand" "d")
@@ -3117,6 +3180,35 @@
"%h0 = %h1 * %h2 %M3%!"
[(set_attr "type" "dsp32")])
+(define_insn "flag_mulhi_parts"
+ [(set (vec_select:HI
+ (match_operand:V2HI 0 "register_operand" "d")
+ (parallel [(match_operand 3 "const01_operand" "P0P1")]))
+ (unspec:HI [(vec_select:HI
+ (match_operand:V2HI 1 "register_operand" "d")
+ (parallel [(match_operand 4 "const01_operand" "P0P1")]))
+ (vec_select:HI
+ (match_operand:V2HI 2 "register_operand" "d")
+ (parallel [(match_operand 5 "const01_operand" "P0P1")]))
+ (match_operand 6 "const_int_operand" "n")]
+ UNSPEC_MUL_WITH_FLAG))]
+ ""
+{
+ const char *templates[] = {
+ "%h0 = %h1 * %h2 %M6%!",
+ "%d0 = %h1 * %h2 %M6%!",
+ "%h0 = %d1 * %h2 %M6%!",
+ "%d0 = %d1 * %h2 %M6%!",
+ "%h0 = %h1 * %d2 %M6%!",
+ "%d0 = %h1 * %d2 %M6%!",
+ "%h0 = %d1 * %d2 %M6%!",
+ "%d0 = %d1 * %d2 %M6%!" };
+ int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1)
+ + (INTVAL (operands[5]) << 2);
+ return templates[alt];
+}
+ [(set_attr "type" "dsp32")])
+
(define_insn "flag_mulhisi"
[(set (match_operand:SI 0 "register_operand" "=d")
(unspec:SI [(match_operand:HI 1 "register_operand" "d")