[PATCH,rs6000] Implement -maltivec=be for vec_mule and vec_mulo Altivec intrinsics

Bill Schmidt wschmidt@linux.vnet.ibm.com
Mon Jan 13 23:37:00 GMT 2014


This patch provides for interpreting parity of element numbers for the
Altivec vec_mule and vec_mulo intrinsics as big-endian (left to right in
a vector register) when targeting a little endian machine and specifying
-maltivec=be.  New test cases are added to test this functionality on
all supported vector types.

The main change is in the altivec.md define_insns for
vec_widen_{su}mult_{even,odd}_{v8hi,v16qi}, where we now test for
VECTOR_ELT_ORDER_BIG rather than BYTES_BIG_ENDIAN in order to treat the
element order as big-endian.  However, this necessitates changes to
other places in altivec.md where we previously called
gen_vec_widen_{su}mult_*.  The semantics of these internal uses are not
affected by -maltivec=be, so these are now replaced with direct
generation of the underlying instructions that were previously
generated.

Bootstrapped and tested with no new regressions on
powerpc64{,le}-unknown-linux-gnu.  Ok for trunk?

Thanks,
Bill


gcc:

2014-01-13  Bill Schmidt  <wschmidt@vnet.linux.ibm.com>

	* config/rs6000/altivec.md (mulv8hi3): Explicitly generate vmulesh
	and vmulosh rather than call gen_vec_widen_smult_*.
	(vec_widen_umult_even_v16qi): Test VECTOR_ELT_ORDER_BIG rather
	than BYTES_BIG_ENDIAN to determine use of even or odd instruction.
	(vec_widen_smult_even_v16qi): Likewise.
	(vec_widen_umult_even_v8hi): Likewise.
	(vec_widen_smult_even_v8hi): Likewise.
	(vec_widen_umult_odd_v16qi): Likewise.
	(vec_widen_smult_odd_v16qi): Likewise.
	(vec_widen_umult_odd_v8hi): Likewise.
	(vec_widen_smult_odd_v8hi): Likewise.
	(vec_widen_umult_hi_v16qi): Explicitly generate vmuleub and
	vmuloub rather than call gen_vec_widen_umult_*.
	(vec_widen_umult_lo_v16qi): Likewise.
	(vec_widen_smult_hi_v16qi): Explicitly generate vmulesb and
	vmulosb rather than call gen_vec_widen_smult_*.
	(vec_widen_smult_lo_v16qi): Likewise.
	(vec_widen_umult_hi_v8hi): Explicitly generate vmuleuh and vmulouh
	rather than call gen_vec_widen_umult_*.
	(vec_widen_umult_lo_v8hi): Likewise.
	(vec_widen_smult_hi_v8hi): Explicitly gnerate vmulesh and vmulosh
	rather than call gen_vec_widen_smult_*.
	(vec_widen_smult_lo_v8hi): Likewise.

gcc/testsuite:

2014-01-13  Bill Schmidt  <wschmidt@vnet.linux.ibm.com>

	* gcc.dg/vmx/mult-even-odd.c: New.
	* gcc.dg/vmx/mult-even-odd-be-order.c: New.



Index: gcc/testsuite/gcc.dg/vmx/mult-even-odd.c
===================================================================
--- gcc/testsuite/gcc.dg/vmx/mult-even-odd.c	(revision 0)
+++ gcc/testsuite/gcc.dg/vmx/mult-even-odd.c	(revision 0)
@@ -0,0 +1,43 @@
+#include "harness.h"
+
+static void test()
+{
+  vector unsigned char vuca = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  vector unsigned char vucb = {2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3};
+  vector signed char vsca = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
+  vector signed char vscb = {2,-3,2,-3,2,-3,2,-3,2,-3,2,-3,2,-3,2,-3};
+  vector unsigned short vusa = {0,1,2,3,4,5,6,7};
+  vector unsigned short vusb = {2,3,2,3,2,3,2,3};
+  vector signed short vssa = {-4,-3,-2,-1,0,1,2,3};
+  vector signed short vssb = {2,-3,2,-3,2,-3,2,-3};
+  vector unsigned short vuse, vuso;
+  vector signed short vsse, vsso;
+  vector unsigned int vuie, vuio;
+  vector signed int vsie, vsio;
+
+  vuse = vec_mule (vuca, vucb);
+  vuso = vec_mulo (vuca, vucb);
+  vsse = vec_mule (vsca, vscb);
+  vsso = vec_mulo (vsca, vscb);
+  vuie = vec_mule (vusa, vusb);
+  vuio = vec_mulo (vusa, vusb);
+  vsie = vec_mule (vssa, vssb);
+  vsio = vec_mulo (vssa, vssb);
+
+  check (vec_all_eq (vuse,
+		     ((vector unsigned short){0,4,8,12,16,20,24,28})),
+	 "vuse");
+  check (vec_all_eq (vuso,
+		     ((vector unsigned short){3,9,15,21,27,33,39,45})),
+	 "vuso");
+  check (vec_all_eq (vsse,
+		     ((vector signed short){-16,-12,-8,-4,0,4,8,12})),
+	 "vsse");
+  check (vec_all_eq (vsso,
+		     ((vector signed short){21,15,9,3,-3,-9,-15,-21})),
+	 "vsso");
+  check (vec_all_eq (vuie, ((vector unsigned int){0,4,8,12})), "vuie");
+  check (vec_all_eq (vuio, ((vector unsigned int){3,9,15,21})), "vuio");
+  check (vec_all_eq (vsie, ((vector signed int){-8,-4,0,4})), "vsie");
+  check (vec_all_eq (vsio, ((vector signed int){9,3,-3,-9})), "vsio");
+}
Index: gcc/testsuite/gcc.dg/vmx/mult-even-odd-be-order.c
===================================================================
--- gcc/testsuite/gcc.dg/vmx/mult-even-odd-be-order.c	(revision 0)
+++ gcc/testsuite/gcc.dg/vmx/mult-even-odd-be-order.c	(revision 0)
@@ -0,0 +1,64 @@
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
+
+#include "harness.h"
+
+static void test()
+{
+  vector unsigned char vuca = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  vector unsigned char vucb = {2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3};
+  vector signed char vsca = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7};
+  vector signed char vscb = {2,-3,2,-3,2,-3,2,-3,2,-3,2,-3,2,-3,2,-3};
+  vector unsigned short vusa = {0,1,2,3,4,5,6,7};
+  vector unsigned short vusb = {2,3,2,3,2,3,2,3};
+  vector signed short vssa = {-4,-3,-2,-1,0,1,2,3};
+  vector signed short vssb = {2,-3,2,-3,2,-3,2,-3};
+  vector unsigned short vuse, vuso;
+  vector signed short vsse, vsso;
+  vector unsigned int vuie, vuio;
+  vector signed int vsie, vsio;
+
+  vuse = vec_mule (vuca, vucb);
+  vuso = vec_mulo (vuca, vucb);
+  vsse = vec_mule (vsca, vscb);
+  vsso = vec_mulo (vsca, vscb);
+  vuie = vec_mule (vusa, vusb);
+  vuio = vec_mulo (vusa, vusb);
+  vsie = vec_mule (vssa, vssb);
+  vsio = vec_mulo (vssa, vssb);
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  check (vec_all_eq (vuse,
+		     ((vector unsigned short){3,9,15,21,27,33,39,45})),
+	 "vuse");
+  check (vec_all_eq (vuso,
+		     ((vector unsigned short){0,4,8,12,16,20,24,28})),
+	 "vuso");
+  check (vec_all_eq (vsse,
+		     ((vector signed short){21,15,9,3,-3,-9,-15,-21})),
+	 "vsse");
+  check (vec_all_eq (vsso,
+		     ((vector signed short){-16,-12,-8,-4,0,4,8,12})),
+	 "vsso");
+  check (vec_all_eq (vuie, ((vector unsigned int){3,9,15,21})), "vuie");
+  check (vec_all_eq (vuio, ((vector unsigned int){0,4,8,12})), "vuio");
+  check (vec_all_eq (vsie, ((vector signed int){9,3,-3,-9})), "vsie");
+  check (vec_all_eq (vsio, ((vector signed int){-8,-4,0,4})), "vsio");
+#else
+  check (vec_all_eq (vuse,
+		     ((vector unsigned short){0,4,8,12,16,20,24,28})),
+	 "vuse");
+  check (vec_all_eq (vuso,
+		     ((vector unsigned short){3,9,15,21,27,33,39,45})),
+	 "vuso");
+  check (vec_all_eq (vsse,
+		     ((vector signed short){-16,-12,-8,-4,0,4,8,12})),
+	 "vsse");
+  check (vec_all_eq (vsso,
+		     ((vector signed short){21,15,9,3,-3,-9,-15,-21})),
+	 "vsso");
+  check (vec_all_eq (vuie, ((vector unsigned int){0,4,8,12})), "vuie");
+  check (vec_all_eq (vuio, ((vector unsigned int){3,9,15,21})), "vuio");
+  check (vec_all_eq (vsie, ((vector signed int){-8,-4,0,4})), "vsie");
+  check (vec_all_eq (vsio, ((vector signed int){9,3,-3,-9})), "vsio");
+#endif
+}
Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md	(revision 206375)
+++ gcc/config/rs6000/altivec.md	(working copy)
@@ -673,17 +673,18 @@
    rtx high = gen_reg_rtx (V4SImode);
    rtx low = gen_reg_rtx (V4SImode);
 
-   emit_insn (gen_vec_widen_smult_even_v8hi (even, operands[1], operands[2]));
-   emit_insn (gen_vec_widen_smult_odd_v8hi (odd, operands[1], operands[2]));
-
    if (BYTES_BIG_ENDIAN)
      {
+       emit_insn (gen_altivec_vmulesh (even, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmulosh (odd, operands[1], operands[2]));
        emit_insn (gen_altivec_vmrghw (high, even, odd));
        emit_insn (gen_altivec_vmrglw (low, even, odd));
        emit_insn (gen_altivec_vpkuwum (operands[0], high, low));
      }
    else
      {
+       emit_insn (gen_altivec_vmulosh (even, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmulesh (odd, operands[1], operands[2]));
        emit_insn (gen_altivec_vmrghw (high, odd, even));
        emit_insn (gen_altivec_vmrglw (low, odd, even));
        emit_insn (gen_altivec_vpkuwum (operands[0], low, high));
@@ -981,7 +982,7 @@
    (use (match_operand:V16QI 2 "register_operand" ""))]
   "TARGET_ALTIVEC"
 {
-  if (BYTES_BIG_ENDIAN)
+  if (VECTOR_ELT_ORDER_BIG)
     emit_insn (gen_altivec_vmuleub (operands[0], operands[1], operands[2]));
   else
     emit_insn (gen_altivec_vmuloub (operands[0], operands[1], operands[2]));
@@ -994,7 +995,7 @@
    (use (match_operand:V16QI 2 "register_operand" ""))]
   "TARGET_ALTIVEC"
 {
-  if (BYTES_BIG_ENDIAN)
+  if (VECTOR_ELT_ORDER_BIG)
     emit_insn (gen_altivec_vmulesb (operands[0], operands[1], operands[2]));
   else
     emit_insn (gen_altivec_vmulosb (operands[0], operands[1], operands[2]));
@@ -1007,7 +1008,7 @@
    (use (match_operand:V8HI 2 "register_operand" ""))]
   "TARGET_ALTIVEC"
 {
-  if (BYTES_BIG_ENDIAN)
+  if (VECTOR_ELT_ORDER_BIG)
     emit_insn (gen_altivec_vmuleuh (operands[0], operands[1], operands[2]));
   else
     emit_insn (gen_altivec_vmulouh (operands[0], operands[1], operands[2]));
@@ -1020,7 +1021,7 @@
    (use (match_operand:V8HI 2 "register_operand" ""))]
   "TARGET_ALTIVEC"
 {
-  if (BYTES_BIG_ENDIAN)
+  if (VECTOR_ELT_ORDER_BIG)
     emit_insn (gen_altivec_vmulesh (operands[0], operands[1], operands[2]));
   else
     emit_insn (gen_altivec_vmulosh (operands[0], operands[1], operands[2]));
@@ -1033,7 +1034,7 @@
    (use (match_operand:V16QI 2 "register_operand" ""))]
   "TARGET_ALTIVEC"
 {
-  if (BYTES_BIG_ENDIAN)
+  if (VECTOR_ELT_ORDER_BIG)
     emit_insn (gen_altivec_vmuloub (operands[0], operands[1], operands[2]));
   else
     emit_insn (gen_altivec_vmuleub (operands[0], operands[1], operands[2]));
@@ -1046,7 +1047,7 @@
    (use (match_operand:V16QI 2 "register_operand" ""))]
   "TARGET_ALTIVEC"
 {
-  if (BYTES_BIG_ENDIAN)
+  if (VECTOR_ELT_ORDER_BIG)
     emit_insn (gen_altivec_vmulosb (operands[0], operands[1], operands[2]));
   else
     emit_insn (gen_altivec_vmulesb (operands[0], operands[1], operands[2]));
@@ -1059,7 +1060,7 @@
    (use (match_operand:V8HI 2 "register_operand" ""))]
   "TARGET_ALTIVEC"
 {
-  if (BYTES_BIG_ENDIAN)
+  if (VECTOR_ELT_ORDER_BIG)
     emit_insn (gen_altivec_vmulouh (operands[0], operands[1], operands[2]));
   else
     emit_insn (gen_altivec_vmuleuh (operands[0], operands[1], operands[2]));
@@ -1072,7 +1073,7 @@
    (use (match_operand:V8HI 2 "register_operand" ""))]
   "TARGET_ALTIVEC"
 {
-  if (BYTES_BIG_ENDIAN)
+  if (VECTOR_ELT_ORDER_BIG)
     emit_insn (gen_altivec_vmulosh (operands[0], operands[1], operands[2]));
   else
     emit_insn (gen_altivec_vmulesh (operands[0], operands[1], operands[2]));
@@ -2220,12 +2221,18 @@
   rtx ve = gen_reg_rtx (V8HImode);
   rtx vo = gen_reg_rtx (V8HImode);
   
-  emit_insn (gen_vec_widen_umult_even_v16qi (ve, operands[1], operands[2]));
-  emit_insn (gen_vec_widen_umult_odd_v16qi (vo, operands[1], operands[2]));
   if (BYTES_BIG_ENDIAN)
-    emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
+    {
+      emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
+    }
   else
-    emit_insn (gen_altivec_vmrghh (operands[0], vo, ve));
+    {
+      emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghh (operands[0], vo, ve));
+    }
   DONE;
 }")
 
@@ -2240,12 +2247,18 @@
   rtx ve = gen_reg_rtx (V8HImode);
   rtx vo = gen_reg_rtx (V8HImode);
   
-  emit_insn (gen_vec_widen_umult_even_v16qi (ve, operands[1], operands[2]));
-  emit_insn (gen_vec_widen_umult_odd_v16qi (vo, operands[1], operands[2]));
   if (BYTES_BIG_ENDIAN)
-    emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
+    {
+      emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
+    }
   else
-    emit_insn (gen_altivec_vmrglh (operands[0], vo, ve));
+    {
+      emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglh (operands[0], vo, ve));
+    }
   DONE;
 }")
 
@@ -2260,12 +2273,18 @@
   rtx ve = gen_reg_rtx (V8HImode);
   rtx vo = gen_reg_rtx (V8HImode);
   
-  emit_insn (gen_vec_widen_smult_even_v16qi (ve, operands[1], operands[2]));
-  emit_insn (gen_vec_widen_smult_odd_v16qi (vo, operands[1], operands[2]));
   if (BYTES_BIG_ENDIAN)
-    emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
+    {
+      emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
+    }
   else
-    emit_insn (gen_altivec_vmrghh (operands[0], vo, ve));
+    {
+      emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghh (operands[0], vo, ve));
+    }
   DONE;
 }")
 
@@ -2280,12 +2299,18 @@
   rtx ve = gen_reg_rtx (V8HImode);
   rtx vo = gen_reg_rtx (V8HImode);
   
-  emit_insn (gen_vec_widen_smult_even_v16qi (ve, operands[1], operands[2]));
-  emit_insn (gen_vec_widen_smult_odd_v16qi (vo, operands[1], operands[2]));
   if (BYTES_BIG_ENDIAN)
-    emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
+    {
+      emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
+    }
   else
-    emit_insn (gen_altivec_vmrglh (operands[0], vo, ve));
+    {
+      emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglh (operands[0], vo, ve));
+    }
   DONE;
 }")
 
@@ -2300,12 +2325,18 @@
   rtx ve = gen_reg_rtx (V4SImode);
   rtx vo = gen_reg_rtx (V4SImode);
   
-  emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2]));
-  emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2]));
   if (BYTES_BIG_ENDIAN)
-    emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
+    {
+      emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
+    }
   else
-    emit_insn (gen_altivec_vmrghw (operands[0], vo, ve));
+    {
+      emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghw (operands[0], vo, ve));
+    }
   DONE;
 }")
 
@@ -2320,12 +2351,18 @@
   rtx ve = gen_reg_rtx (V4SImode);
   rtx vo = gen_reg_rtx (V4SImode);
   
-  emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2]));
-  emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2]));
   if (BYTES_BIG_ENDIAN)
-    emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
+    {
+      emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
+    }
   else
-    emit_insn (gen_altivec_vmrglw (operands[0], vo, ve));
+    {
+      emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglw (operands[0], vo, ve));
+    }
   DONE;
 }")
 
@@ -2340,12 +2377,18 @@
   rtx ve = gen_reg_rtx (V4SImode);
   rtx vo = gen_reg_rtx (V4SImode);
   
-  emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2]));
-  emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2]));
   if (BYTES_BIG_ENDIAN)
-    emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
+    {
+      emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
+    }
   else
-    emit_insn (gen_altivec_vmrghw (operands[0], vo, ve));
+    {
+      emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghw (operands[0], vo, ve));
+    }
   DONE;
 }")
 
@@ -2360,12 +2403,18 @@
   rtx ve = gen_reg_rtx (V4SImode);
   rtx vo = gen_reg_rtx (V4SImode);
   
-  emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2]));
-  emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2]));
   if (BYTES_BIG_ENDIAN)
-    emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
+    {
+      emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
+    }
   else
-    emit_insn (gen_altivec_vmrglw (operands[0], vo, ve));
+    {
+      emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglw (operands[0], vo, ve));
+    }
   DONE;
 }")
 




More information about the Gcc-patches mailing list