This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Actually fix libhsail-rt build on x86_64/i?86 32-bit


Hi!

I've noticed a bug in libhsail-rt/configure.tgt - those files are actually
pure shell script, not autoconf templates (they are just sourced from
the autoconf templates as well as configure scripts generated by autoconf),
so [[ ]] doesn't really work there.

After fixing that I've noticed that libhsail-rt actually doesn't build at
all as 32-bit.  The most important reason is that the code uses __int128_t
and __uint128_t, which is not supported on ILP32.  While computing the
saturating arithmetics that way is probably efficient for 8-bit and 16-bit,
I believe even on x86_64 it is unnecessarily slow for 32-bit and very slow
for 64-bit SAT computations, so I rewrote those to use the overflow
builtins, which should emit efficient code.  Do you have some sufficient
testsuite to test those?  I also don't understand the 
uint8_t __hsail_sat_sub_u8 (uint8_t a, uint8_t b)
{
  int16_t c = (uint16_t) a - (uint16_t) b;
  if (c < 0)
    return 0;
  else if (c > UINT8_MAX)
    return UINT8_MAX;
  else
    return c;
}
How could subtraction of unsigned values ever yield a value larger than
the unsigned maximum?  For a and b in [0, UINT8_MAX] a - b with infinite
precision is in the range [ -UINT8_MAX, UINT8_MAX ], so only the if (c < 0)
return 0; else return c; makes sense there to me.
The rest of the patch are workaround for pointer to int and int to pointer
conversion warning and the last one is for stripping away volatile
qualifier.  Haven't tested whether it actually works then (but I only see
dg-do compile tests for brig anyway).

OT, should we ask for a DWARF language code for BRIG?  Shall it be
DW_LANG_hsail or DW_LANG_brig, something else?

2017-01-30  Jakub Jelinek  <jakub@redhat.com>

	* configure.tgt: Fix i?86-*-linux* entry.
	* rt/sat_arithmetic.c (__hsail_sat_add_u32, __hsail_sat_add_u64,
	__hsail_sat_add_s32, __hsail_sat_add_s64): Use __builtin_add_overflow.
	(__hsail_sat_sub_u8, __hsail_sat_sub_u16): Remove pointless for overflow
	over maximum.
	(__hsail_sat_sub_u32, __hsail_sat_sub_u64, __hsail_sat_sub_s32,
	__hsail_sat_sub_s64): Use __builtin_sub_overflow.
	(__hsail_sat_mul_u32, __hsail_sat_mul_u64, __hsail_sat_mul_s32,
	__hsail_sat_mul_s64): Use __builtin_mul_overflow.
	* rt/arithmetic.c (__hsail_borrow_u32, __hsail_borrow_u64): Use
	__builtin_sub_overflow_p.
	(__hsail_carry_u32, __hsail_carry_u64): Use __builtin_add_overflow_p.
	* rt/misc.c (__hsail_groupbaseptr, __hsail_kernargbaseptr_u64):
	Cast pointers to uintptr_t first before casting to some other integral
	type.
	* rt/segment.c (__hsail_segmentp_private, __hsail_segmentp_group): Likewise.
	* rt/queue.c (__hsail_ldqueuereadindex, __hsail_ldqueuewriteindex,
	__hsail_addqueuewriteindex, __hsail_casqueuewriteindex,
	__hsail_stqueuereadindex, __hsail_stqueuewriteindex): Cast integral value
	to uintptr_t first before casting to pointer.
	* rt/workitems.c (__hsail_alloca_pop_frame): Cast memcpy first argument to
	void * to avoid warning.

--- libhsail-rt/configure.tgt.jj	2017-01-30 09:31:51.000000000 +0100
+++ libhsail-rt/configure.tgt	2017-01-30 09:35:04.402926654 +0100
@@ -28,9 +28,7 @@
 # broken systems. Currently it has been tested only on x86_64 Linux
 # of the upstream gcc targets. More targets shall be added after testing.
 case "${target}" in
-  i[[3456789]]86-*linux*)
-    ;;
-  x86_64-*-linux*)
+  x86_64-*-linux* | i?86-*-linux*)
     ;;
     *)
     UNSUPPORTED=1
--- libhsail-rt/rt/sat_arithmetic.c.jj	2017-01-26 09:17:46.000000000 +0100
+++ libhsail-rt/rt/sat_arithmetic.c	2017-01-30 10:27:27.861325330 +0100
@@ -49,21 +49,18 @@ __hsail_sat_add_u16 (uint16_t a, uint16_
 uint32_t
 __hsail_sat_add_u32 (uint32_t a, uint32_t b)
 {
-  uint64_t c = (uint64_t) a + (uint64_t) b;
-  if (c > UINT32_MAX)
+  uint32_t c;
+  if (__builtin_add_overflow (a, b, &c))
     return UINT32_MAX;
-  else
-    return c;
+  return c;
 }
 
 uint64_t
 __hsail_sat_add_u64 (uint64_t a, uint64_t b)
 {
-  __uint128_t c = (__uint128_t) a + (__uint128_t) b;
-  if (c > UINT64_MAX)
+  uint64_t c;
+  if (__builtin_add_overflow (a, b, &c))
     return UINT64_MAX;
-  else
-    return c;
 }
 
 int8_t
@@ -93,25 +90,19 @@ __hsail_sat_add_s16 (int16_t a, int16_t
 int32_t
 __hsail_sat_add_s32 (int32_t a, int32_t b)
 {
-  int64_t c = (int64_t) a + (int64_t) b;
-  if (c > INT32_MAX)
-    return INT32_MAX;
-  else if (c < INT32_MIN)
-    return INT32_MIN;
-  else
-    return c;
+  int32_t c;
+  if (__builtin_add_overflow (a, b, &c))
+    return b < 0 ? INT32_MIN : INT32_MAX;
+  return c;
 }
 
 int64_t
 __hsail_sat_add_s64 (int64_t a, int64_t b)
 {
-  __int128_t c = (__int128_t) a + (__int128_t) b;
-  if (c > INT64_MAX)
-    return INT64_MAX;
-  else if (c < INT64_MIN)
-    return INT64_MIN;
-  else
-    return c;
+  int64_t c;
+  if (__builtin_add_overflow (a, b, &c))
+    return b < 0 ? INT64_MIN : INT64_MAX;
+  return c;
 }
 
 uint8_t
@@ -120,8 +111,6 @@ __hsail_sat_sub_u8 (uint8_t a, uint8_t b
   int16_t c = (uint16_t) a - (uint16_t) b;
   if (c < 0)
     return 0;
-  else if (c > UINT8_MAX)
-    return UINT8_MAX;
   else
     return c;
 }
@@ -132,8 +121,6 @@ __hsail_sat_sub_u16 (uint16_t a, uint16_
   int32_t c = (uint32_t) a - (uint32_t) b;
   if (c < 0)
     return 0;
-  else if (c > UINT16_MAX)
-    return UINT16_MAX;
   else
     return c;
 }
@@ -141,25 +128,19 @@ __hsail_sat_sub_u16 (uint16_t a, uint16_
 uint32_t
 __hsail_sat_sub_u32 (uint32_t a, uint32_t b)
 {
-  int64_t c = (uint64_t) a - (uint64_t) b;
-  if (c < 0)
+  uint32_t c;
+  if (__builtin_sub_overflow (a, b, &c))
     return 0;
-  else if (c > UINT32_MAX)
-    return UINT32_MAX;
-  else
-    return c;
+  return c;
 }
 
 uint64_t
 __hsail_sat_sub_u64 (uint64_t a, uint64_t b)
 {
-  __int128_t c = (__uint128_t) a - (__uint128_t) b;
-  if (c < 0)
+  uint64_t c;
+  if (__builtin_sub_overflow (a, b, &c))
     return 0;
-  else if (c > UINT64_MAX)
-    return UINT64_MAX;
-  else
-    return c;
+  return c;
 }
 
 int8_t
@@ -189,25 +170,19 @@ __hsail_sat_sub_s16 (int16_t a, int16_t
 int32_t
 __hsail_sat_sub_s32 (int32_t a, int32_t b)
 {
-  int64_t c = (int64_t) a - (int64_t) b;
-  if (c > INT32_MAX)
-    return INT32_MAX;
-  else if (c < INT32_MIN)
-    return INT32_MIN;
-  else
-    return c;
+  int32_t c;
+  if (__builtin_sub_overflow (a, b, &c))
+    return b < 0 ? INT32_MAX : INT32_MIN;
+  return c;
 }
 
 int64_t
 __hsail_sat_sub_s64 (int64_t a, int64_t b)
 {
-  __int128_t c = (__int128_t) a - (__int128_t) b;
-  if (c > INT64_MAX)
-    return INT64_MAX;
-  else if (c < INT64_MIN)
-    return INT64_MIN;
-  else
-    return c;
+  int64_t c;
+  if (__builtin_sub_overflow (a, b, &c))
+    return b < 0 ? INT64_MAX : INT64_MIN;
+  return c;
 }
 
 uint8_t
@@ -233,21 +208,19 @@ __hsail_sat_mul_u16 (uint16_t a, uint16_
 uint32_t
 __hsail_sat_mul_u32 (uint32_t a, uint32_t b)
 {
-  uint64_t c = (uint64_t) a * (uint64_t) b;
-  if (c > UINT32_MAX)
+  uint32_t c;
+  if (__builtin_mul_overflow (a, b, &c))
     return UINT32_MAX;
-  else
-    return c;
+  return c;
 }
 
 uint64_t
 __hsail_sat_mul_u64 (uint64_t a, uint64_t b)
 {
-  __uint128_t c = (__uint128_t) a * (__uint128_t) b;
-  if (c > UINT64_MAX)
+  uint64_t c;
+  if (__builtin_mul_overflow (a, b, &c))
     return UINT64_MAX;
-  else
-    return c;
+  return c;
 }
 
 int8_t
@@ -277,23 +250,17 @@ __hsail_sat_mul_s16 (int16_t a, int16_t
 int32_t
 __hsail_sat_mul_s32 (int32_t a, int32_t b)
 {
-  int64_t c = (int64_t) a * (int64_t) b;
-  if (c > INT32_MAX)
-    return INT32_MAX;
-  else if (c < INT32_MIN)
-    return INT32_MIN;
-  else
-    return c;
+  int32_t c;
+  if (__builtin_mul_overflow (a, b, &c))
+    return ((a > 0) ^ (b > 0)) ? INT32_MIN : INT32_MAX;
+  return c;
 }
 
 int64_t
 __hsail_sat_mul_s64 (int64_t a, int64_t b)
 {
-  __int128_t c = (__int128_t) a * (__int128_t) b;
-  if (c > INT64_MAX)
-    return INT64_MAX;
-  else if (c < INT64_MIN)
-    return INT64_MIN;
-  else
-    return c;
+  int64_t c;
+  if (__builtin_mul_overflow (a, b, &c))
+    return ((a > 0) ^ (b > 0)) ? INT64_MIN : INT64_MAX;
+  return c;
 }
--- libhsail-rt/rt/arithmetic.c.jj	2017-01-26 09:17:46.000000000 +0100
+++ libhsail-rt/rt/arithmetic.c	2017-01-30 10:35:44.935726056 +0100
@@ -376,41 +376,25 @@ __hsail_ftz_f64 (double a)
 uint32_t
 __hsail_borrow_u32 (uint32_t a, uint32_t b)
 {
-  uint64_t c = (uint64_t) a - (uint64_t) b;
-  if (c > UINT32_MAX)
-    return 1;
-  else
-    return 0;
+  return __builtin_sub_overflow_p (a, b, a);
 }
 
 uint64_t
 __hsail_borrow_u64 (uint64_t a, uint64_t b)
 {
-  __uint128_t c = (__uint128_t) a - (__uint128_t) b;
-  if (c > UINT64_MAX)
-    return 1;
-  else
-    return 0;
+  return __builtin_sub_overflow_p (a, b, a);
 }
 
 uint32_t
 __hsail_carry_u32 (uint32_t a, uint32_t b)
 {
-  uint64_t c = (uint64_t) a + (uint64_t) b;
-  if (c > UINT32_MAX)
-    return 1;
-  else
-    return 0;
+  return __builtin_add_overflow_p (a, b, a);
 }
 
 uint64_t
 __hsail_carry_u64 (uint64_t a, uint64_t b)
 {
-  __uint128_t c = (__uint128_t) a + (__uint128_t) b;
-  if (c > UINT64_MAX)
-    return 1;
-  else
-    return 0;
+  return __builtin_add_overflow_p (a, b, a);
 }
 
 float
--- libhsail-rt/rt/misc.c.jj	2017-01-26 09:17:46.000000000 +0100
+++ libhsail-rt/rt/misc.c	2017-01-30 10:09:36.340473728 +0100
@@ -68,8 +68,8 @@ __hsail_debugtrap (uint32_t src, PHSAWor
 uint32_t
 __hsail_groupbaseptr (PHSAWorkItem *wi)
 {
-  return (uint32_t) (uint64_t) (wi->wg->group_base_ptr
-				- wi->launch_data->group_segment_start_addr);
+  return (uint32_t) (uintptr_t) (wi->wg->group_base_ptr
+				 - wi->launch_data->group_segment_start_addr);
 }
 
 uint64_t
@@ -77,7 +77,7 @@ __hsail_kernargbaseptr_u64 (PHSAWorkItem
 {
   /* For now assume only a single kernarg allocation at a time.
      Proper kernarg memory management to do.  */
-  return (uint64_t) wi->launch_data->kernarg_addr;
+  return (uint64_t) (uintptr_t) wi->launch_data->kernarg_addr;
 }
 
 uint32_t
--- libhsail-rt/rt/segment.c.jj	2017-01-26 09:17:46.000000000 +0100
+++ libhsail-rt/rt/segment.c	2017-01-30 10:12:15.430371604 +0100
@@ -32,9 +32,10 @@ __hsail_segmentp_private (uint64_t flat_
   if (flat_addr == 0)
     return 1;
   else
-    return (void *) flat_addr >= wi->wg->private_base_ptr
-	   && (void *) flat_addr
-		< wi->wg->private_base_ptr + wi->wg->private_segment_total_size;
+    return ((void *) (uintptr_t) flat_addr >= wi->wg->private_base_ptr
+	    && ((void *) (uintptr_t) flat_addr
+		< (wi->wg->private_base_ptr
+		   + wi->wg->private_segment_total_size)));
 }
 
 uint32_t
@@ -43,9 +44,10 @@ __hsail_segmentp_group (uint64_t flat_ad
   if (flat_addr == 0)
     return 1;
   else
-    return (void *) flat_addr >= wi->wg->group_base_ptr
-	   && (void *) flat_addr < wi->wg->group_base_ptr
-				     + wi->launch_data->dp->group_segment_size;
+    return ((void *) (uintptr_t) flat_addr >= wi->wg->group_base_ptr
+	    && ((void *) (uintptr_t) flat_addr
+		< (wi->wg->group_base_ptr
+		   + wi->launch_data->dp->group_segment_size)));
 }
 
 uint32_t
--- libhsail-rt/rt/queue.c.jj	2017-01-26 09:17:46.000000000 +0100
+++ libhsail-rt/rt/queue.c	2017-01-30 10:13:16.903560011 +0100
@@ -29,21 +29,21 @@
 uint64_t
 __hsail_ldqueuereadindex (uint64_t queue_addr)
 {
-  phsa_queue_t *queue = (phsa_queue_t *) queue_addr;
+  phsa_queue_t *queue = (phsa_queue_t *) (uintptr_t) queue_addr;
   return queue->read_index;
 }
 
 uint64_t
 __hsail_ldqueuewriteindex (uint64_t queue_addr)
 {
-  phsa_queue_t *queue = (phsa_queue_t *) queue_addr;
+  phsa_queue_t *queue = (phsa_queue_t *) (uintptr_t) queue_addr;
   return queue->write_index;
 }
 
 uint64_t
 __hsail_addqueuewriteindex (uint64_t queue_addr, uint64_t value)
 {
-  phsa_queue_t *queue = (phsa_queue_t *) queue_addr;
+  phsa_queue_t *queue = (phsa_queue_t *) (uintptr_t) queue_addr;
   return __sync_fetch_and_add (&queue->write_index, value);
 }
 
@@ -51,7 +51,7 @@ uint64_t
 __hsail_casqueuewriteindex (uint64_t queue_addr, uint64_t cmp_value,
 				   uint64_t new_value)
 {
-  phsa_queue_t *queue = (phsa_queue_t *) queue_addr;
+  phsa_queue_t *queue = (phsa_queue_t *) (uintptr_t) queue_addr;
   return __sync_val_compare_and_swap (&queue->write_index, cmp_value,
 				      new_value);
 }
@@ -59,13 +59,13 @@ __hsail_casqueuewriteindex (uint64_t que
 void
 __hsail_stqueuereadindex (uint64_t queue_addr, uint64_t value)
 {
-  phsa_queue_t *queue = (phsa_queue_t *) queue_addr;
+  phsa_queue_t *queue = (phsa_queue_t *) (uintptr_t) queue_addr;
   queue->read_index = value;
 }
 
 void
 __hsail_stqueuewriteindex (uint64_t queue_addr, uint64_t value)
 {
-  phsa_queue_t *queue = (phsa_queue_t *) queue_addr;
+  phsa_queue_t *queue = (phsa_queue_t *) (uintptr_t) queue_addr;
   queue->write_index = value;
 }
--- libhsail-rt/rt/workitems.c.jj	2017-01-26 09:17:46.000000000 +0100
+++ libhsail-rt/rt/workitems.c	2017-01-30 10:14:53.901279408 +0100
@@ -938,7 +938,7 @@ __hsail_alloca_pop_frame (PHSAWorkItem *
   volatile PHSAWorkGroup *wg = wi->wg;
 
   wg->alloca_stack_p = wg->alloca_frame_p;
-  memcpy (&wg->alloca_frame_p,
+  memcpy ((void *) &wg->alloca_frame_p,
 	  (const void *) (wg->private_base_ptr + wg->alloca_frame_p), 4);
   /* Now frame_p points to the beginning of the previous function's
      frame and stack_p to its end.  */

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]