This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, x86] merge movsd/movhpd pair in peephole


Hi,

For the testcase 1.c

#include <emmintrin.h>

double a[1000];

__m128d foo1() {
  __m128d res;
  res = _mm_load_sd(&a[1]);
  res = _mm_loadh_pd(res, &a[2]);
  return res;
}

llvm will merge movsd/movhpd to movupd while gcc will not. The merge
is beneficial on x86 machines starting from Nehalem.

The patch is to add the merging in peephole.
bootstrap and regression pass. Is it ok for stage1?

Thanks,
Wei.

gcc/ChangeLog:

2014-04-09  Wei Mi  <wmi@google.com>

        * config/i386/i386.c (get_memref_parts): New function.
        (adjacent_mem_locations): Ditto.
        * config/i386/i386-protos.h: Add decl for adjacent_mem_locations.
        * config/i386/sse.md: Add define_peephole rule.

gcc/testsuite/ChangeLog:

2014-04-09  Wei Mi  <wmi@google.com>

        * gcc.target/i386/sse2-unaligned-mov.c: New test.

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 6e32978..3ae0d6d 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -312,6 +312,7 @@ extern enum attr_cpu ix86_schedule;
 #endif

 extern const char * ix86_output_call_insn (rtx insn, rtx call_op);
+extern bool adjacent_mem_locations (rtx mem1, rtx mem2);

 #ifdef RTX_CODE
 /* Target data for multipass lookahead scheduling.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3eefe4a..a330e84 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -46737,6 +46737,70 @@ ix86_atomic_assign_expand_fenv (tree *hold,
tree *clear, tree *update)
                    atomic_feraiseexcept_call);
 }

+/* Try to determine BASE/OFFSET/SIZE parts of the given MEM.
+   Return true if successful, false if all the values couldn't
+   be determined.
+
+   This function only looks for REG/SYMBOL or REG/SYMBOL+CONST
+   address forms. */
+
+static bool
+get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
+                 HOST_WIDE_INT *size)
+{
+  rtx addr_rtx;
+  if MEM_SIZE_KNOWN_P (mem)
+    *size = MEM_SIZE (mem);
+  else
+    return false;
+
+  if (GET_CODE (XEXP (mem, 0)) == CONST)
+    addr_rtx = XEXP (XEXP (mem, 0), 0);
+  else
+    addr_rtx = (XEXP (mem, 0));
+
+  if (GET_CODE (addr_rtx) == REG
+      || GET_CODE (addr_rtx) == SYMBOL_REF)
+    {
+      *base = addr_rtx;
+      *offset = 0;
+    }
+  else if (GET_CODE (addr_rtx) == PLUS
+          && CONST_INT_P (XEXP (addr_rtx, 1)))
+    {
+      *base = XEXP (addr_rtx, 0);
+      *offset = INTVAL (XEXP (addr_rtx, 1));
+    }
+  else
+    return false;
+
+  return true;
+}
+
+/* If MEM1 is adjacent to MEM2 and MEM1 has lower address,
+   return true.  */
+
+extern bool
+adjacent_mem_locations (rtx mem1, rtx mem2)
+{
+  rtx base1, base2;
+  HOST_WIDE_INT off1, size1, off2, size2;
+
+  if (get_memref_parts (mem1, &base1, &off1, &size1)
+      && get_memref_parts (mem2, &base2, &off2, &size2))
+    {
+      if (GET_CODE (base1) == SYMBOL_REF
+         && GET_CODE (base2) == SYMBOL_REF
+         && SYMBOL_REF_DECL (base1) == SYMBOL_REF_DECL (base2))
+        return (off1 + size1 == off2);
+      else if (REG_P (base1)
+              && REG_P (base2)
+              && REGNO (base1) == REGNO (base2))
+        return (off1 + size1 == off2);
+    }
+  return false;
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_RETURN_IN_MEMORY
 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 72a4d6d..4bf8461 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15606,3 +15606,37 @@
   [(set_attr "type" "sselog1")
    (set_attr "length_immediate" "1")
    (set_attr "mode" "TI")])
+
+;; merge movsd/movhpd to movupd when TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
+;; is true.
+(define_peephole2
+  [(set (match_operand:DF 0 "register_operand")
+       (match_operand:DF 1 "memory_operand"))
+   (set (match_operand:V2DF 2 "register_operand")
+       (vec_concat:V2DF (match_dup 0)
+        (match_operand:DF 3 "memory_operand")))]
+  "TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
+   && REGNO (operands[0]) == REGNO (operands[2])
+   && adjacent_mem_locations (operands[1], operands[3])"
+  [(set (match_dup 2)
+       (unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))]
+{
+  operands[4] = gen_rtx_MEM (V2DFmode, XEXP(operands[1], 0));
+})
+
+;; merge movsd/movhpd to movupd when TARGET_SSE_UNALIGNED_STORE_OPTIMAL
+;; is true.
+(define_peephole2
+  [(set (match_operand:DF 0 "memory_operand")
+        (vec_select:DF (match_operand:V2DF 1 "register_operand")
+                      (parallel [(const_int 0)])))
+   (set (match_operand:DF 2 "memory_operand")
+        (vec_select:DF (match_dup 1)
+                       (parallel [(const_int 1)])))]
+  "TARGET_SSE_UNALIGNED_STORE_OPTIMAL
+   && adjacent_mem_locations (operands[0], operands[2])"
+  [(set (match_dup 3)
+        (unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))]
+{
+  operands[3] = gen_rtx_MEM (V2DFmode, XEXP(operands[0], 0));
+})
diff --git a/gcc/testsuite/gcc.target/i386/sse2-unaligned-mov.c
b/gcc/testsuite/gcc.target/i386/sse2-unaligned-mov.c
new file mode 100644
index 0000000..28470ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-unaligned-mov.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mtune=corei7 -O2" } */
+
+#include <emmintrin.h>
+
+double a[1000];
+
+__m128d foo1() {
+  __m128d res;
+  res = _mm_load_sd(&a[1]);
+  res = _mm_loadh_pd(res, &a[2]);
+  return res;
+}
+
+void foo2(__m128d res) {
+  _mm_store_sd(&a[1], res);
+  _mm_storeh_pd(&a[2], res);
+}
+
+/* { dg-final { scan-assembler-times "movup" 2 } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]