[RFC patch, i386]: Use STV pass to load/store any TImode constant using SSE insns

Uros Bizjak ubizjak@gmail.com
Wed Apr 27 19:59:00 GMT 2016


Hello!

This RFC patch illustrates the idea of using STV pass to load/store
any TImode constant using SSE insns. The testcase:

--cut here--
__int128 x;

__int128 test_1 (void)
{
  x = (__int128) 0x00112233;
}

__int128 test_2 (void)
{
  x = ((__int128) 0x0011223344556677 << 64);
}

__int128 test_3 (void)
{
  x = ((__int128) 0x0011223344556677 << 64) + (__int128) 0x0011223344556677;
}
--cut here--

currently compiles (-O2) on x86_64 to:

test_1:
        movq    $1122867, x(%rip)
        movq    $0, x+8(%rip)
        ret

test_2:
        xorl    %eax, %eax
        movabsq $4822678189205111, %rdx
        movq    %rax, x(%rip)
        movq    %rdx, x+8(%rip)
        ret

test_3:
        movabsq $4822678189205111, %rax
        movabsq $4822678189205111, %rdx
        movq    %rax, x(%rip)
        movq    %rdx, x+8(%rip)
        ret

However, using the attached patch, we compile all tests to:

test:
        movdqa  .LC0(%rip), %xmm0
        movaps  %xmm0, x(%rip)
        ret

Ilya, HJ - do you think new sequences are better, or - as suggested by
Jakub - they are beneficial with STV pass, as we are now able to load
any immediate value? A variant of this patch can also be used to load
DImode values to 32bit STV pass.

Uros.
-------------- next part --------------
Index: i386.c
===================================================================
--- i386.c	(revision 235526)
+++ i386.c	(working copy)
@@ -2854,29 +2854,16 @@ timode_scalar_to_vector_candidate_p (rtx_insn *ins
 
   if (MEM_P (dst))
     {
-      /* Check for store.  Only support store from register or standard
-	 SSE constants.  Memory must be aligned or unaligned store is
-	 optimal.  */
-      if (misaligned_operand (dst, TImode)
-	  && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
-	return false;
-
-      switch (GET_CODE (src))
-	{
-	default:
-	  return false;
-
-	case REG:
-	  return true;
-
-	case CONST_INT:
-	  return standard_sse_constant_p (src, TImode);
-	}
+      /* Check for store.  Memory must be aligned
+	 or unaligned store is optimal.  */
+      return ((REG_P (src) || CONST_SCALAR_INT_P (src))
+	      && (!misaligned_operand (dst, TImode)
+		  || TARGET_SSE_UNALIGNED_STORE_OPTIMAL));
     }
   else if (MEM_P (src))
     {
-      /* Check for load.  Memory must be aligned or unaligned load is
-	 optimal.  */
+      /* Check for load.  Memory must be aligned
+	 or unaligned load is optimal.  */
       return (REG_P (dst)
 	      && (!misaligned_operand (src, TImode)
 		  || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
@@ -3744,6 +3731,7 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)
 	  PUT_MODE (XEXP (tmp, 0), V1TImode);
       }
       /* FALLTHRU */
+
     case MEM:
       PUT_MODE (dst, V1TImode);
       break;
@@ -3759,28 +3747,26 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)
       PUT_MODE (src, V1TImode);
       break;
 
-    case CONST_INT:
-      switch (standard_sse_constant_p (src, TImode))
-	{
-	case 1:
-	  src = CONST0_RTX (GET_MODE (dst));
-	  break;
-	case 2:
-	  src = CONSTM1_RTX (GET_MODE (dst));
-	  break;
-	default:
-	  gcc_unreachable ();
-	}
-      if (NONDEBUG_INSN_P (insn))
-	{
-	  rtx tmp = gen_reg_rtx (V1TImode);
-	  /* Since there are no instructions to store standard SSE
-	     constant, temporary register usage is required.  */
-	  emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
-	  dst = tmp;
-	}
-      break;
+    CASE_CONST_SCALAR_INT:
+      {
+	rtx vec = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
 
+	if (NONDEBUG_INSN_P (insn))
+	  {
+	    rtx tmp = gen_reg_rtx (V1TImode);
+
+	    if (!standard_sse_constant_p (src, TImode))
+	      vec = validize_mem (force_const_mem (V1TImode, vec));
+
+	    /* We can only store from a SSE register.  */
+	    emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
+	    dst = tmp;
+	  }
+
+	src = vec;
+	break;
+      }
+  
     default:
       gcc_unreachable ();
     }
@@ -14784,8 +14770,7 @@ ix86_legitimate_constant_p (machine_mode mode, rtx
 #endif
       break;
 
-    case CONST_INT:
-    case CONST_WIDE_INT:
+    CASE_CONST_SCALAR_INT:
       switch (mode)
 	{
 	case TImode:
@@ -14823,10 +14808,7 @@ ix86_cannot_force_const_mem (machine_mode mode, rt
   /* We can always put integral constants and vectors in memory.  */
   switch (GET_CODE (x))
     {
-    case CONST_INT:
-    case CONST_WIDE_INT:
-    case CONST_DOUBLE:
-    case CONST_VECTOR:
+    CASE_CONST_ANY:
       return false;
 
     default:


More information about the Gcc-patches mailing list