[AArch64] Enable generation of FRINTNZ instructions
Andre Vieira (lists)
andre.simoesdiasvieira@arm.com
Thu Nov 25 13:53:08 GMT 2021
On 22/11/2021 11:41, Richard Biener wrote:
>
>> On 18/11/2021 11:05, Richard Biener wrote:
>>> This is a good shout and made me think about something I hadn't before... I
>>> thought I could handle the vector forms later, but the problem is if I add
>>> support for the scalar, it will stop the vectorizer. It seems
>>> vectorizable_call expects all arguments to have the same type, which doesn't
>>> work with passing the integer type as an operand work around.
> We already special case some IFNs there (masked load/store and gather)
> to ignore some args, so that would just add to this set.
>
> Richard.
Hi,
Reworked it to add support of the new IFN to the vectorizer. Was
initially trying to make vectorizable_call and
vectorizable_internal_function handle IFNs with different inputs more
generically, using the information we have in the <IFN>_direct structs
regarding what operands to get the modes from. Unfortunately, that
wasn't straightforward because of how vectorizable_call assumes operands
have the same type and uses the type of the DEF_STMT_INFO of the
non-constant operands (either output operand or non-constant inputs) to
determine the type of constants. I assume there is some reason why we
use the DEF_STMT_INFO and not always use get_vectype_for_scalar_type on
the argument types. That is why I ended up with this sort of half-way
mix of both, which still allows room to add more IFNs that don't take
inputs of the same type, but require adding a bit of special casing
similar to the IFN_FTRUNC_INT and masking ones.
Bootstrapped on aarch64-none-linux.
OK for trunk?
gcc/ChangeLog:
* config/aarch64/aarch64.md (ftrunc<mode><frintnz_mode>2): New
pattern.
* config/aarch64/iterators.md (FRINTNZ): New iterator.
(frintnz_mode): New int attribute.
(VSFDF): Make iterator conditional.
* internal-fn.def (FTRUNC_INT): New IFN.
* internal-fn.c (ftrunc_int_direct): New define.
(expand_ftrunc_int_optab_fn): New custom expander.
(direct_ftrunc_int_optab_supported_p): New supported_p.
* match.pd: Add to the existing TRUNC pattern match.
* optabs.def (ftrunc_int): New entry.
* stor-layout.h (element_precision): Moved from here...
* tree.h (element_precision): ... to here.
(element_type): New declaration.
* tree.c (element_type): New function.
(element_precision): Changed to use element_type.
* tree-vect-stmts.c (vectorizable_internal_function): Add
support for
IFNs with different input types.
(vectorizable_call): Teach to handle IFN_FTRUNC_INT.
* doc/md.texi: New entry for ftrunc pattern name.
* doc/sourcebuild.texi (aarch64_frintzx_ok): New target.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/merge_trunc1.c: Adapted to skip if frintNz
instruction available.
* lib/target-supports.exp: Added arm_v8_5a_frintnzx_ok target.
* gcc.target/aarch64/frintnz.c: New test.
* gcc.target/aarch64/frintnz_vec.c: New test.
-------------- next part --------------
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 4035e061706793849c68ae09bcb2e4b9580ab7b6..c5c60e7a810e22b0ea9ed6bf056ddd6431d60269 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -7345,12 +7345,18 @@ (define_insn "despeculate_simpleti"
(set_attr "speculation_barrier" "true")]
)
+(define_expand "ftrunc<mode><frintnz_mode>2"
+ [(set (match_operand:VSFDF 0 "register_operand" "=w")
+ (unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")]
+ FRINTNZ))]
+ "TARGET_FRINT"
+)
+
(define_insn "aarch64_<frintnzs_op><mode>"
[(set (match_operand:VSFDF 0 "register_operand" "=w")
(unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")]
FRINTNZX))]
- "TARGET_FRINT && TARGET_FLOAT
- && !(VECTOR_MODE_P (<MODE>mode) && !TARGET_SIMD)"
+ "TARGET_FRINT"
"<frintnzs_op>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
[(set_attr "type" "f_rint<stype>")]
)
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index bdc8ba3576cf2c9b4ae96b45a382234e4e25b13f..51f00344b02d0d1d4adf97463f6a46f9fd0fb43f 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -160,7 +160,11 @@ (define_mode_iterator VHSDF_HSDF [(V4HF "TARGET_SIMD_F16INST")
SF DF])
;; Scalar and vetor modes for SF, DF.
-(define_mode_iterator VSFDF [V2SF V4SF V2DF DF SF])
+(define_mode_iterator VSFDF [ (V2SF "TARGET_SIMD")
+ (V4SF "TARGET_SIMD")
+ (V2DF "TARGET_SIMD")
+ (DF "TARGET_FLOAT")
+ (SF "TARGET_FLOAT")])
;; Advanced SIMD single Float modes.
(define_mode_iterator VDQSF [V2SF V4SF])
@@ -3067,6 +3071,8 @@ (define_int_iterator FCMLA [UNSPEC_FCMLA
(define_int_iterator FRINTNZX [UNSPEC_FRINT32Z UNSPEC_FRINT32X
UNSPEC_FRINT64Z UNSPEC_FRINT64X])
+(define_int_iterator FRINTNZ [UNSPEC_FRINT32Z UNSPEC_FRINT64Z])
+
(define_int_iterator SVE_BRK_UNARY [UNSPEC_BRKA UNSPEC_BRKB])
(define_int_iterator SVE_BRK_BINARY [UNSPEC_BRKN UNSPEC_BRKPA UNSPEC_BRKPB])
@@ -3482,6 +3488,8 @@ (define_int_attr f16mac1 [(UNSPEC_FMLAL "a") (UNSPEC_FMLSL "s")
(define_int_attr frintnzs_op [(UNSPEC_FRINT32Z "frint32z") (UNSPEC_FRINT32X "frint32x")
(UNSPEC_FRINT64Z "frint64z") (UNSPEC_FRINT64X "frint64x")])
+(define_int_attr frintnz_mode [(UNSPEC_FRINT32Z "si") (UNSPEC_FRINT64Z "di")])
+
;; The condition associated with an UNSPEC_COND_<xx>.
(define_int_attr cmp_op [(UNSPEC_COND_CMPEQ_WIDE "eq")
(UNSPEC_COND_CMPGE_WIDE "ge")
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 41f1850bf6e95005647ca97a495a97d7e184d137..d50d09b0ae60d98537b9aece4396a490f33f174c 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6175,6 +6175,15 @@ operands; otherwise, it may not.
This pattern is not allowed to @code{FAIL}.
+@cindex @code{ftrunc@var{m}@var{n}2} instruction pattern
+@item @samp{ftrunc@var{m}@var{n}2}
+Truncate operand 1 to a @var{n} mode signed integer, towards zero, and store
+the result in operand 0. Both operands have mode @var{m}, which is a scalar or
+vector floating-point mode. Exception must be thrown if operand 1 does not fit
+in a @var{n} mode signed integer as it would have if the truncation happened
+through separate floating point to integer conversion.
+
+
@cindex @code{round@var{m}2} instruction pattern
@item @samp{round@var{m}2}
Round operand 1 to the nearest integer, rounding away from zero in the
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 40b1e0d816789b225089c4143fb63e62a6af817a..15d4de24d15cce6793b3bb61d728e61cea00924d 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2282,6 +2282,10 @@ Like @code{aarch64_sve_hw}, but also test for an exact hardware vector length.
@item aarch64_fjcvtzs_hw
AArch64 target that is able to generate and execute armv8.3-a FJCVTZS
instruction.
+
+@item aarch64_frintzx_ok
+AArch64 target that is able to generate the Armv8.5-a FRINT32Z, FRINT64Z,
+FRINT32X and FRINT64X instructions.
@end table
@subsubsection MIPS-specific attributes
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 0cba95411a63423484dda5b1251f47de24e926ba..60b404ef44360c8ae0cda1176fb888302ddbc98d 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -130,6 +130,7 @@ init_internal_fns ()
#define fold_left_direct { 1, 1, false }
#define mask_fold_left_direct { 1, 1, false }
#define check_ptrs_direct { 0, 0, false }
+#define ftrunc_int_direct { 0, 1, true }
const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
#define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
@@ -156,6 +157,29 @@ get_multi_vector_move (tree array_type, convert_optab optab)
return convert_optab_handler (optab, imode, vmode);
}
+/* Expand FTRUNC_INT call STMT using optab OPTAB. */
+
+static void
+expand_ftrunc_int_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+{
+ class expand_operand ops[2];
+ tree lhs, float_type, int_type;
+ rtx target, op;
+
+ lhs = gimple_call_lhs (stmt);
+ target = expand_normal (lhs);
+ op = expand_normal (gimple_call_arg (stmt, 0));
+
+ float_type = TREE_TYPE (lhs);
+ int_type = element_type (gimple_call_arg (stmt, 1));
+
+ create_output_operand (&ops[0], target, TYPE_MODE (float_type));
+ create_input_operand (&ops[1], op, TYPE_MODE (float_type));
+
+ expand_insn (convert_optab_handler (optab, TYPE_MODE (float_type),
+ TYPE_MODE (int_type)), 2, ops);
+}
+
/* Expand LOAD_LANES call STMT using optab OPTAB. */
static void
@@ -3688,6 +3712,15 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
!= CODE_FOR_nothing);
}
+static bool direct_ftrunc_int_optab_supported_p (convert_optab optab,
+ tree_pair types,
+ optimization_type opt_type)
+{
+ return (convert_optab_handler (optab, TYPE_MODE (types.first),
+ TYPE_MODE (element_type (types.second)),
+ opt_type) != CODE_FOR_nothing);
+}
+
#define direct_unary_optab_supported_p direct_optab_supported_p
#define direct_binary_optab_supported_p direct_optab_supported_p
#define direct_ternary_optab_supported_p direct_optab_supported_p
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index bb13c6cce1bf55633760bc14980402f1f0ac1689..e58891e3d3ebc805dd55ac6f70bbda617b7302b7 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -66,6 +66,9 @@ along with GCC; see the file COPYING3. If not see
- fold_left: for scalar = FN (scalar, vector), keyed off the vector mode
- check_ptrs: used for check_{raw,war}_ptrs
+ - ftrunc_int: a unary conversion optab that takes and returns values of the
+ same mode, but internally converts via another mode. This second mode is
+ specified using a dummy final function argument.
DEF_INTERNAL_SIGNED_OPTAB_FN defines an internal function that
maps to one of two optabs, depending on the signedness of an input.
@@ -269,6 +272,7 @@ DEF_INTERNAL_FLT_FLOATN_FN (RINT, ECF_CONST, rint, unary)
DEF_INTERNAL_FLT_FLOATN_FN (ROUND, ECF_CONST, round, unary)
DEF_INTERNAL_FLT_FLOATN_FN (ROUNDEVEN, ECF_CONST, roundeven, unary)
DEF_INTERNAL_FLT_FLOATN_FN (TRUNC, ECF_CONST, btrunc, unary)
+DEF_INTERNAL_OPTAB_FN (FTRUNC_INT, ECF_CONST, ftruncint, ftrunc_int)
/* Binary math functions. */
DEF_INTERNAL_FLT_FN (ATAN2, ECF_CONST, atan2, binary)
diff --git a/gcc/match.pd b/gcc/match.pd
index a319aefa8081ac177981ad425c461f8a771128f4..80660e6fd40bc6934e1fa0329c0fbcab1658ed44 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3713,12 +3713,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
trapping behaviour, so require !flag_trapping_math. */
#if GIMPLE
(simplify
- (float (fix_trunc @0))
- (if (!flag_trapping_math
- && types_match (type, TREE_TYPE (@0))
- && direct_internal_fn_supported_p (IFN_TRUNC, type,
- OPTIMIZE_FOR_BOTH))
- (IFN_TRUNC @0)))
+ (float (fix_trunc@1 @0))
+ (if (types_match (type, TREE_TYPE (@0)))
+ (with {
+ tree int_type = element_type (@1);
+ }
+ (if (TYPE_SIGN (TREE_TYPE (@1)) == SIGNED
+ && direct_internal_fn_supported_p (IFN_FTRUNC_INT, type, int_type,
+ OPTIMIZE_FOR_BOTH))
+ (IFN_FTRUNC_INT @0 {
+ wide_int_to_tree (int_type, wi::max_value (TYPE_PRECISION (int_type),
+ SIGNED)); })
+ (if (!flag_trapping_math
+ && direct_internal_fn_supported_p (IFN_TRUNC, type,
+ OPTIMIZE_FOR_BOTH))
+ (IFN_TRUNC @0))))))
#endif
/* If we have a narrowing conversion to an integral type that is fed by a
diff --git a/gcc/optabs.def b/gcc/optabs.def
index b889ad2e5a08613db51d16d072080ac6cb48404f..57d259d33409265df3af1646d123e4ab216c34c8 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -63,6 +63,7 @@ OPTAB_CX(fractuns_optab, "fractuns$Q$b$I$a2")
OPTAB_CL(satfract_optab, "satfract$b$Q$a2", SAT_FRACT, "satfract", gen_satfract_conv_libfunc)
OPTAB_CL(satfractuns_optab, "satfractuns$I$b$Q$a2", UNSIGNED_SAT_FRACT, "satfractuns", gen_satfractuns_conv_libfunc)
+OPTAB_CD(ftruncint_optab, "ftrunc$a$b2")
OPTAB_CD(sfixtrunc_optab, "fix_trunc$F$b$I$a2")
OPTAB_CD(ufixtrunc_optab, "fixuns_trunc$F$b$I$a2")
diff --git a/gcc/stor-layout.h b/gcc/stor-layout.h
index 9e892e50c8559e497fcae1b77a36401df82fabe2..165a592d4d2c7bf525060dd51ce6094eb4f4f68a 100644
--- a/gcc/stor-layout.h
+++ b/gcc/stor-layout.h
@@ -36,7 +36,6 @@ extern void place_field (record_layout_info, tree);
extern void compute_record_mode (tree);
extern void finish_bitfield_layout (tree);
extern void finish_record_layout (record_layout_info, int);
-extern unsigned int element_precision (const_tree);
extern void finalize_size_functions (void);
extern void fixup_unsigned_type (tree);
extern void initialize_sizetypes (void);
diff --git a/gcc/testsuite/gcc.target/aarch64/frintnz.c b/gcc/testsuite/gcc.target/aarch64/frintnz.c
new file mode 100644
index 0000000000000000000000000000000000000000..008e1cf9f4a1b0148128c65c9ea0d1bb111467b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/frintnz.c
@@ -0,0 +1,91 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.5-a" } */
+/* { dg-require-effective-target aarch64_frintnzx_ok } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** f1:
+** frint32z s0, s0
+** ret
+*/
+float
+f1 (float x)
+{
+ int y = x;
+ return (float) y;
+}
+
+/*
+** f2:
+** frint64z s0, s0
+** ret
+*/
+float
+f2 (float x)
+{
+ long long int y = x;
+ return (float) y;
+}
+
+/*
+** f3:
+** frint32z d0, d0
+** ret
+*/
+double
+f3 (double x)
+{
+ int y = x;
+ return (double) y;
+}
+
+/*
+** f4:
+** frint64z d0, d0
+** ret
+*/
+double
+f4 (double x)
+{
+ long long int y = x;
+ return (double) y;
+}
+
+float
+f1_dont (float x)
+{
+ unsigned int y = x;
+ return (float) y;
+}
+
+float
+f2_dont (float x)
+{
+ unsigned long long int y = x;
+ return (float) y;
+}
+
+double
+f3_dont (double x)
+{
+ unsigned int y = x;
+ return (double) y;
+}
+
+double
+f4_dont (double x)
+{
+ unsigned long long int y = x;
+ return (double) y;
+}
+
+double
+f5_dont (double x)
+{
+ signed short y = x;
+ return (double) y;
+}
+
+/* Make sure the 'dont's don't generate any frintNz. */
+/* { dg-final { scan-assembler-times {frint32z} 2 } } */
+/* { dg-final { scan-assembler-times {frint64z} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/frintnz_vec.c b/gcc/testsuite/gcc.target/aarch64/frintnz_vec.c
new file mode 100644
index 0000000000000000000000000000000000000000..b93304eb2acb3d3d954eebee51d77ff23fee68ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/frintnz_vec.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.5-a" } */
+/* { dg-require-effective-target aarch64_frintnzx_ok } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define TEST(name,float_type,int_type) \
+void \
+name (float_type * __restrict__ x, float_type * __restrict__ y, int n) \
+{ \
+ for (int i = 0; i < n; ++i) \
+ { \
+ int_type x_i = x[i]; \
+ y[i] = (float_type) x_i; \
+ } \
+}
+
+/*
+** f1:
+** ...
+** frint32z v0.4s, v0.4s
+** ...
+*/
+TEST(f1, float, int)
+
+/*
+** f2:
+** ...
+** frint64z v0.4s, v0.4s
+** ...
+*/
+TEST(f2, float, long long)
+
+/*
+** f3:
+** ...
+** frint32z v0.2d, v0.2d
+** ...
+*/
+TEST(f3, double, int)
+
+/*
+** f4:
+** ...
+** frint64z v0.2d, v0.2d
+** ...
+*/
+TEST(f4, double, long long)
diff --git a/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c b/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
index 07217064e2ba54fcf4f5edc440e6ec19ddae66e1..3d80871c4cebd5fb5cac0714b3feee27038f05fd 100644
--- a/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
+++ b/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
@@ -1,5 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ffast-math" } */
+/* { dg-skip-if "" { aarch64_frintnzx_ok } } */
float
f1 (float x)
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 8cbda192fe0fae59ea208ee43696b4d22c43e61e..450ca78230faeba40b89fc7987af27b6bf0a0d53 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -11365,6 +11365,32 @@ proc check_effective_target_arm_v8_3a_bkey_directive { } {
}]
}
+# Return 1 if the target supports Armv8.5-A scalar and Advanced SIMD
+# FRINT32[ZX] andd FRINT64[ZX] instructions, 0 otherwise. The test is valid for
+# AArch64.
+proc check_effective_target_aarch64_frintnzx_ok_nocache { } {
+
+ if { ![istarget aarch64*-*-*] } {
+ return 0;
+ }
+
+ if { [check_no_compiler_messages_nocache \
+ aarch64_frintnzx_ok assembly {
+ #if !defined (__ARM_FEATURE_FRINT)
+ #error "__ARM_FEATURE_FRINT not defined"
+ #endif
+ } [current_compiler_flags]] } {
+ return 1;
+ }
+
+ return 0;
+}
+
+proc check_effective_target_aarch64_frintnzx_ok { } {
+ return [check_cached_effective_target aarch64_frintnzx_ok \
+ check_effective_target_aarch64_frintnzx_ok_nocache]
+}
+
# Return 1 if the target supports executing the Armv8.1-M Mainline Low
# Overhead Loop, 0 otherwise. The test is valid for ARM.
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 03cc7267cf80d4ce73c0d89ab86b07e84752456a..35bb1f70f7b173ad0d1e9f70ce0ac9da891dbe62 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1625,7 +1625,8 @@ vect_finish_stmt_generation (vec_info *vinfo,
static internal_fn
vectorizable_internal_function (combined_fn cfn, tree fndecl,
- tree vectype_out, tree vectype_in)
+ tree vectype_out, tree vectype_in,
+ tree *vectypes)
{
internal_fn ifn;
if (internal_fn_p (cfn))
@@ -1637,8 +1638,12 @@ vectorizable_internal_function (combined_fn cfn, tree fndecl,
const direct_internal_fn_info &info = direct_internal_fn (ifn);
if (info.vectorizable)
{
- tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
- tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
+ tree type0 = (info.type0 < 0 ? vectype_out : vectypes[info.type0]);
+ if (!type0)
+ type0 = vectype_in;
+ tree type1 = (info.type1 < 0 ? vectype_out : vectypes[info.type1]);
+ if (!type1)
+ type1 = vectype_in;
if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
OPTIMIZE_FOR_SPEED))
return ifn;
@@ -3252,16 +3257,31 @@ vectorizable_call (vec_info *vinfo,
rhs_type = unsigned_type_node;
}
- int mask_opno = -1;
+ /* The argument that is not of the same type as the others. */
+ int diff_opno = -1;
+ bool masked = false;
if (internal_fn_p (cfn))
- mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
+ {
+ if (cfn == CFN_FTRUNC_INT)
+ /* For FTRUNC this represents the argument that carries the type of the
+ intermediate signed integer. */
+ diff_opno = 1;
+ else
+ {
+ /* For masked operations this represents the argument that carries the
+ mask. */
+ diff_opno = internal_fn_mask_index (as_internal_fn (cfn));
+ masked = diff_opno >= 0;
+ }
+ }
for (i = 0; i < nargs; i++)
{
- if ((int) i == mask_opno)
+ if ((int) i == diff_opno && masked)
{
- if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
- &op, &slp_op[i], &dt[i], &vectypes[i]))
+ if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node,
+ diff_opno, &op, &slp_op[i], &dt[i],
+ &vectypes[i]))
return false;
continue;
}
@@ -3275,27 +3295,35 @@ vectorizable_call (vec_info *vinfo,
return false;
}
- /* We can only handle calls with arguments of the same type. */
- if (rhs_type
- && !types_compatible_p (rhs_type, TREE_TYPE (op)))
+ if ((int) i != diff_opno)
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "argument types differ.\n");
- return false;
- }
- if (!rhs_type)
- rhs_type = TREE_TYPE (op);
+ /* We can only handle calls with arguments of the same type. */
+ if (rhs_type
+ && !types_compatible_p (rhs_type, TREE_TYPE (op)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "argument types differ.\n");
+ return false;
+ }
+ if (!rhs_type)
+ rhs_type = TREE_TYPE (op);
- if (!vectype_in)
- vectype_in = vectypes[i];
- else if (vectypes[i]
- && !types_compatible_p (vectypes[i], vectype_in))
+ if (!vectype_in)
+ vectype_in = vectypes[i];
+ else if (vectypes[i]
+ && !types_compatible_p (vectypes[i], vectype_in))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "argument vector types differ.\n");
+ return false;
+ }
+ }
+ else
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "argument vector types differ.\n");
- return false;
+ vectypes[i] = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op),
+ slp_node);
}
}
/* If all arguments are external or constant defs, infer the vector type
@@ -3371,8 +3399,8 @@ vectorizable_call (vec_info *vinfo,
|| (modifier == NARROW
&& simple_integer_narrowing (vectype_out, vectype_in,
&convert_code))))
- ifn = vectorizable_internal_function (cfn, callee, vectype_out,
- vectype_in);
+ ifn = vectorizable_internal_function (cfn, callee, vectype_out, vectype_in,
+ &vectypes[0]);
/* If that fails, try asking for a target-specific built-in function. */
if (ifn == IFN_LAST)
@@ -3446,12 +3474,12 @@ vectorizable_call (vec_info *vinfo,
record_stmt_cost (cost_vec, ncopies / 2,
vec_promote_demote, stmt_info, 0, vect_body);
- if (loop_vinfo && mask_opno >= 0)
+ if (loop_vinfo && masked)
{
unsigned int nvectors = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
: ncopies);
- tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
+ tree scalar_mask = gimple_call_arg (stmt_info->stmt, diff_opno);
vect_record_loop_mask (loop_vinfo, masks, nvectors,
vectype_out, scalar_mask);
}
@@ -3499,7 +3527,7 @@ vectorizable_call (vec_info *vinfo,
{
/* We don't define any narrowing conditional functions
at present. */
- gcc_assert (mask_opno < 0);
+ gcc_assert (!masked);
tree half_res = make_ssa_name (vectype_in);
gcall *call
= gimple_build_call_internal_vec (ifn, vargs);
@@ -3519,15 +3547,15 @@ vectorizable_call (vec_info *vinfo,
}
else
{
- if (mask_opno >= 0 && masked_loop_p)
+ if (masked && masked_loop_p)
{
unsigned int vec_num = vec_oprnds0.length ();
/* Always true for SLP. */
gcc_assert (ncopies == 1);
tree mask = vect_get_loop_mask (gsi, masks, vec_num,
vectype_out, i);
- vargs[mask_opno] = prepare_load_store_mask
- (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
+ vargs[diff_opno] = prepare_load_store_mask
+ (TREE_TYPE (mask), mask, vargs[diff_opno], gsi);
}
gcall *call;
@@ -3559,13 +3587,13 @@ vectorizable_call (vec_info *vinfo,
orig_vargs[i] = vargs[i] = vec_defs[i][j];
}
- if (mask_opno >= 0 && masked_loop_p)
+ if (masked && masked_loop_p)
{
tree mask = vect_get_loop_mask (gsi, masks, ncopies,
vectype_out, j);
- vargs[mask_opno]
+ vargs[diff_opno]
= prepare_load_store_mask (TREE_TYPE (mask), mask,
- vargs[mask_opno], gsi);
+ vargs[diff_opno], gsi);
}
gimple *new_stmt;
@@ -3584,7 +3612,7 @@ vectorizable_call (vec_info *vinfo,
{
/* We don't define any narrowing conditional functions at
present. */
- gcc_assert (mask_opno < 0);
+ gcc_assert (!masked);
tree half_res = make_ssa_name (vectype_in);
gcall *call = gimple_build_call_internal_vec (ifn, vargs);
gimple_call_set_lhs (call, half_res);
@@ -3628,7 +3656,7 @@ vectorizable_call (vec_info *vinfo,
{
auto_vec<vec<tree> > vec_defs (nargs);
/* We don't define any narrowing conditional functions at present. */
- gcc_assert (mask_opno < 0);
+ gcc_assert (!masked);
for (j = 0; j < ncopies; ++j)
{
/* Build argument list for the vectorized call. */
diff --git a/gcc/tree.h b/gcc/tree.h
index f62c00bc8707029db52e2f3fe529948755235d3d..31ce45a84cc267ea2022c8ca6323368fbe15eb8b 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -6547,4 +6547,12 @@ extern unsigned fndecl_dealloc_argno (tree);
object or pointer. Otherwise return null. */
extern tree get_attr_nonstring_decl (tree, tree * = NULL);
+/* Return the type, or for a complex or vector type the type of its
+ elements. */
+extern tree element_type (const_tree);
+
+/* Return the precision of the type, or for a complex or vector type the
+ precision of the type of its elements. */
+extern unsigned int element_precision (const_tree);
+
#endif /* GCC_TREE_H */
diff --git a/gcc/tree.c b/gcc/tree.c
index 845228a055b2cfac0c9ca8c0cda1b9df4b0095c6..f1e9a1eb48769cb11aa69730e2480ed5522f78c1 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -6645,11 +6645,11 @@ valid_constant_size_p (const_tree size, cst_size_error *perr /* = NULL */)
return true;
}
-/* Return the precision of the type, or for a complex or vector type the
- precision of the type of its elements. */
+/* Return the type, or for a complex or vector type the type of its
+ elements. */
-unsigned int
-element_precision (const_tree type)
+tree
+element_type (const_tree type)
{
if (!TYPE_P (type))
type = TREE_TYPE (type);
@@ -6657,7 +6657,16 @@ element_precision (const_tree type)
if (code == COMPLEX_TYPE || code == VECTOR_TYPE)
type = TREE_TYPE (type);
- return TYPE_PRECISION (type);
+ return (tree) type;
+}
+
+/* Return the precision of the type, or for a complex or vector type the
+ precision of the type of its elements. */
+
+unsigned int
+element_precision (const_tree type)
+{
+ return TYPE_PRECISION (element_type (type));
}
/* Return true if CODE represents an associative tree code. Otherwise
More information about the Gcc-patches
mailing list