[PATCH, i386]: Allow memory operands for btr/bts and btc (PR target/46091)

Uros Bizjak ubizjak@gmail.com
Wed Aug 16 16:49:00 GMT 2017


Hello!

Attached patch allows memory operands for btr/bts and btc
instructions.  The previous comment was wrong, these instructions do
not enforce atomic operations with memory operand without "lock"
prefix. Instructions in its RMW form with immediate operand are not
slower than corresponding logic instructions.

(When variable count operand is used with RMW form, these instructions
are considerably slower, since they can address full 32bit address
range from their base operand.)

2017-08-16  Uros Bizjak  <ubizjak@gmail.com>

    PR target/46091
    * config/i386/i386.md (*anddi_1_btr): Change predicates of
    operand 0 and operand 1 to nomimmediate_operand. Add "m" constraint.
    Add ix86_binary_operator_ok to insn constraint.
    (*iordi_1_bts): Ditto.
    (*xordi_1_btc): Ditto.
    (*btsq): Change predicate of operand 0 to nonimmediate_operand.
    Update corresponding peephole2 pattern.
    (*btrq): Ditto.
    (*btcq): Ditto.

testsuite/ChangeLog:

2017-08-16  Uros Bizjak  <ubizjak@gmail.com>

    PR target/46091
    * gcc.target/i386/pr46091-1.c: Update scan-assembler-times.
    (testm): New test function.
    * gcc.target/i386/pr46091-2.c: Ditto.
    * gcc.target/i386/pr46091-3.c: Ditto.

Patch was bootstrapped and regression tested on x86_64-linux-gnu.

Committed to mainline SVN.

Uros.
-------------- next part --------------
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 059a51832de..2ad3ad7d216 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -8268,12 +8268,13 @@
    (set_attr "mode" "SI,DI,DI,SI")])
 
 (define_insn_and_split "*anddi_1_btr"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
 	(and:DI
-	 (match_operand:DI 1 "register_operand" "%0")
+	 (match_operand:DI 1 "nonimmediate_operand" "%0")
 	 (match_operand:DI 2 "const_int_operand" "n")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && TARGET_USE_BT
+   && ix86_binary_operator_ok (AND, DImode, operands)
    && IN_RANGE (exact_log2 (~INTVAL (operands[2])), 31, 63)"
   "#"
   "&& reload_completed"
@@ -8813,12 +8814,13 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn_and_split "*iordi_1_bts"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
 	(ior:DI
-	 (match_operand:DI 1 "register_operand" "%0")
+	 (match_operand:DI 1 "nonimmediate_operand" "%0")
 	 (match_operand:DI 2 "const_int_operand" "n")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && TARGET_USE_BT
+   && ix86_binary_operator_ok (IOR, DImode, operands)
    && IN_RANGE (exact_log2 (INTVAL (operands[2])), 31, 63)"
   "#"
   "&& reload_completed"
@@ -8834,12 +8836,13 @@
    (set_attr "mode" "DI")])
 
 (define_insn_and_split "*xordi_1_btc"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
 	(xor:DI
-	 (match_operand:DI 1 "register_operand" "%0")
+	 (match_operand:DI 1 "nonimmediate_operand" "%0")
 	 (match_operand:DI 2 "const_int_operand" "n")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && TARGET_USE_BT
+   && ix86_binary_operator_ok (XOR, DImode, operands)
    && IN_RANGE (exact_log2 (INTVAL (operands[2])), 31, 63)"
   "#"
   "&& reload_completed"
@@ -10996,10 +10999,10 @@
 ;; Bit set / bit test instructions
 
 ;; %%% bts, btr, btc, bt.
-;; In general these instructions are *slow* when applied to memory,
-;; since they enforce atomic operation.  When applied to registers,
-;; it depends on the cpu implementation.  They're never faster than
-;; the corresponding and/ior/xor operations, so with 32-bit there's
+;; In general these instructions are *slow* with variable operand
+;; when applied to memory.  When applied to registers, it depends
+;; on the cpu implementation.  They're never faster than the
+;; corresponding and/ior/xor operations, so with 32-bit there's
 ;; no point.  But in 64-bit, we can't hold the relevant immediates
 ;; within the instruction itself, so operating on bits in the high
 ;; 32-bits of a register becomes easier.
@@ -11009,7 +11012,7 @@
 ;; negdf respectively, so they can never be disabled entirely.
 
 (define_insn "*btsq"
-  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+  [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
 			 (const_int 1)
 			 (match_operand 1 "const_0_to_63_operand" "J"))
 	(const_int 1))
@@ -11022,7 +11025,7 @@
    (set_attr "mode" "DI")])
 
 (define_insn "*btrq"
-  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+  [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
 			 (const_int 1)
 			 (match_operand 1 "const_0_to_63_operand" "J"))
 	(const_int 0))
@@ -11035,7 +11038,7 @@
    (set_attr "mode" "DI")])
 
 (define_insn "*btcq"
-  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+  [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
 			 (const_int 1)
 			 (match_operand 1 "const_0_to_63_operand" "J"))
 	(not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1))))
@@ -11052,7 +11055,7 @@
 (define_peephole2
   [(match_scratch:DI 2 "r")
    (parallel [(set (zero_extract:DI
-		     (match_operand:DI 0 "register_operand")
+		     (match_operand:DI 0 "nonimmediate_operand")
 		     (const_int 1)
 		     (match_operand 1 "const_0_to_63_operand"))
 		   (const_int 1))
@@ -11076,7 +11079,7 @@
 (define_peephole2
   [(match_scratch:DI 2 "r")
    (parallel [(set (zero_extract:DI
-		     (match_operand:DI 0 "register_operand")
+		     (match_operand:DI 0 "nonimmediate_operand")
 		     (const_int 1)
 		     (match_operand 1 "const_0_to_63_operand"))
 		   (const_int 0))
@@ -11100,7 +11103,7 @@
 (define_peephole2
   [(match_scratch:DI 2 "r")
    (parallel [(set (zero_extract:DI
-		     (match_operand:DI 0 "register_operand")
+		     (match_operand:DI 0 "nonimmediate_operand")
 		     (const_int 1)
 		     (match_operand 1 "const_0_to_63_operand"))
 	      (not:DI (zero_extract:DI
diff --git a/gcc/testsuite/gcc.target/i386/pr46091-1.c b/gcc/testsuite/gcc.target/i386/pr46091-1.c
index adca01f294c..74685af8184 100644
--- a/gcc/testsuite/gcc.target/i386/pr46091-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr46091-1.c
@@ -6,4 +6,11 @@ unsigned long long test (unsigned long long a)
   return a & ~(1ull << 55);
 }
 
-/* { dg-final { scan-assembler "btr" } } */
+extern unsigned long long m;
+
+void testm (void)
+{
+  m &= ~(1ull << 45);
+}
+
+/* { dg-final { scan-assembler-times "btr" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr46091-2.c b/gcc/testsuite/gcc.target/i386/pr46091-2.c
index 174375393cf..5b340450725 100644
--- a/gcc/testsuite/gcc.target/i386/pr46091-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr46091-2.c
@@ -6,4 +6,11 @@ unsigned long long test (unsigned long long a)
   return a | (1ull << 55);
 }
 
-/* { dg-final { scan-assembler "bts" } } */
+extern unsigned long long m;
+
+void testm (void)
+{
+  m |= (1ull << 45);
+}
+
+/* { dg-final { scan-assembler-times "bts" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr46091-3.c b/gcc/testsuite/gcc.target/i386/pr46091-3.c
index c8091e9f41c..3c601a3c543 100644
--- a/gcc/testsuite/gcc.target/i386/pr46091-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr46091-3.c
@@ -6,4 +6,11 @@ unsigned long long test (unsigned long long a)
   return a ^ (1ull << 55);
 }
 
-/* { dg-final { scan-assembler "btc" } } */
+extern unsigned long long m;
+
+void testm (void)
+{
+  m ^= (1ull << 45);
+}
+
+/* { dg-final { scan-assembler-times "btc" 2 } } */


More information about the Gcc-patches mailing list