This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Optimize in RTL vector AND { -1, -1, ... }, IOR { -1, -1, ... } and XOR { -1, -1, ... }
- From: Jakub Jelinek <jakub at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 22 Sep 2011 23:16:28 +0200
- Subject: [PATCH] Optimize in RTL vector AND { -1, -1, ... }, IOR { -1, -1, ... } and XOR { -1, -1, ... }
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
The PR50482 testcase shows that we don't optimize logical vector ops
with all ones vectors very well.
The following patch implements that optimization it, bootstrapped/regtested
on x86_64-linux and i686-linux, ok for trunk?
2011-09-22 Jakub Jelinek <jakub@redhat.com>
* rtlanal.c (all_ones_cst): New function.
* rtl.h (all_ones_cst): New prototype.
* simplify-rtx.c (simplify_binary_operation_1) <case IOR, XOR, AND>:
Optimize if one operand is all_ones_cst.
* config/i386/i386.c (ix86_expand_sse_movcc): Optimize mask ? -1 : x
into mask | x.
--- gcc/rtlanal.c.jj 2011-08-26 18:41:44.000000000 +0200
+++ gcc/rtlanal.c 2011-09-22 18:43:21.000000000 +0200
@@ -5209,3 +5209,24 @@ low_bitmask_len (enum machine_mode mode,
return exact_log2 (m + 1);
}
+
+bool
+all_ones_cst (const_rtx x)
+{
+
+ if (x == constm1_rtx)
+ return true;
+
+ if (GET_CODE (x) == CONST_VECTOR
+ && CONST_VECTOR_NUNITS (x) == GET_MODE_NUNITS (GET_MODE (x)))
+ {
+ int i, units = CONST_VECTOR_NUNITS (x);
+
+ for (i = 0; i < units; i++)
+ if (CONST_VECTOR_ELT (x, i) != constm1_rtx)
+ return false;
+ return true;
+ }
+
+ return false;
+}
--- gcc/rtl.h.jj 2011-08-28 12:36:57.000000000 +0200
+++ gcc/rtl.h 2011-09-22 18:43:41.000000000 +0200
@@ -1218,6 +1218,7 @@ extern unsigned int num_sign_bit_copies
extern bool constant_pool_constant_p (rtx);
extern bool truncated_to_mode (enum machine_mode, const_rtx);
extern int low_bitmask_len (enum machine_mode, unsigned HOST_WIDE_INT);
+extern bool all_ones_cst (const_rtx);
#ifndef GENERATOR_FILE
/* Return the cost of SET X. SPEED_P is true if optimizing for speed
--- gcc/simplify-rtx.c.jj 2011-09-15 12:18:54.000000000 +0200
+++ gcc/simplify-rtx.c 2011-09-22 18:48:39.000000000 +0200
@@ -2431,9 +2431,10 @@ simplify_binary_operation_1 (enum rtx_co
case IOR:
if (trueop1 == CONST0_RTX (mode))
return op0;
- if (CONST_INT_P (trueop1)
- && ((UINTVAL (trueop1) & GET_MODE_MASK (mode))
- == GET_MODE_MASK (mode)))
+ if ((CONST_INT_P (trueop1)
+ && ((UINTVAL (trueop1) & GET_MODE_MASK (mode))
+ == GET_MODE_MASK (mode)))
+ || all_ones_cst (trueop1))
return op1;
if (rtx_equal_p (trueop0, trueop1) && ! side_effects_p (op0))
return op0;
@@ -2573,9 +2574,10 @@ simplify_binary_operation_1 (enum rtx_co
case XOR:
if (trueop1 == CONST0_RTX (mode))
return op0;
- if (CONST_INT_P (trueop1)
- && ((UINTVAL (trueop1) & GET_MODE_MASK (mode))
- == GET_MODE_MASK (mode)))
+ if ((CONST_INT_P (trueop1)
+ && ((UINTVAL (trueop1) & GET_MODE_MASK (mode))
+ == GET_MODE_MASK (mode)))
+ || all_ones_cst (trueop1))
return simplify_gen_unary (NOT, mode, op0, mode);
if (rtx_equal_p (trueop0, trueop1)
&& ! side_effects_p (op0)
@@ -2721,6 +2723,8 @@ simplify_binary_operation_1 (enum rtx_co
case AND:
if (trueop1 == CONST0_RTX (mode) && ! side_effects_p (op0))
return trueop1;
+ if (all_ones_cst (trueop1))
+ return op0;
if (HWI_COMPUTABLE_MODE_P (mode))
{
HOST_WIDE_INT nzop0 = nonzero_bits (trueop0, mode);
--- gcc/config/i386/i386.c.jj 2011-09-22 18:37:00.000000000 +0200
+++ gcc/config/i386/i386.c 2011-09-22 18:55:45.000000000 +0200
@@ -18899,6 +18899,12 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp
x = gen_rtx_AND (mode, x, op_false);
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
}
+ else if (all_ones_cst (op_true))
+ {
+ op_false = force_reg (mode, op_false);
+ x = gen_rtx_IOR (mode, cmp, op_false);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
else if (TARGET_XOP)
{
op_true = force_reg (mode, op_true);
Jakub