This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] nvptx: do not use alternative spelling of unsigned comparisons


Hello!

The following patch fixes subtle breakage on NVPTX when unsigned comparisons
would be sometimes mistranslated by PTX JIT.  The new test demonstrates that
by using the %nctaid.x (number of blocks) register, but a comparison against
a value in constant memory can also trigger that (I could easily reproduce
that with NVCC, but not with GCC yet).  OK for trunk?

When emitting unsigned comparisons, nvptx backend uses alternative spellings
lo/ls/hs/hi for relational operators, instead of the usual lt/le/ge/gt,
respectively.  There's no nominal difference, but in practice ptxas and PTX
JIT can mistranslate the former in some circumstances, e.g. when the first
comparison operand is a load from constant memory (perhaps implicitely like
%nctaid.x).

This was reported to NVIDIA, but the workaround on GCC side is to simply
use the same common spellings as for signed comparisons, so let's do that.

Alexander

gcc/ChangeLog:
	* config/nvptx/nvptx.c (nvptx_print_operand): Treat LEU, GEU, LTU, GTU
        like LE, GE, LT, GT when emitting relational operator.

gcc/testsuite/ChangeLog:
	* gcc.target/nvptx/unsigned-cmp.c: New test.
---

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 27c797b..efd0f8e 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -2161,28 +2161,20 @@ nvptx_print_operand (FILE *file, rtx x, int code)
 	    fputs (".ne", file);
 	  break;
 	case LE:
+	case LEU:
 	  fputs (".le", file);
 	  break;
 	case GE:
+	case GEU:
 	  fputs (".ge", file);
 	  break;
 	case LT:
+	case LTU:
 	  fputs (".lt", file);
 	  break;
 	case GT:
-	  fputs (".gt", file);
-	  break;
-	case LEU:
-	  fputs (".ls", file);
-	  break;
-	case GEU:
-	  fputs (".hs", file);
-	  break;
-	case LTU:
-	  fputs (".lo", file);
-	  break;
 	case GTU:
-	  fputs (".hi", file);
+	  fputs (".gt", file);
 	  break;
 	case LTGT:
 	  fputs (".ne", file);
diff --git a/gcc/testsuite/gcc.target/nvptx/unsigned-cmp.c b/gcc/testsuite/gcc.target/nvptx/unsigned-cmp.c
new file mode 100644
index 0000000..a0cf5c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/unsigned-cmp.c
@@ -0,0 +1,50 @@
+/* { dg-options "-O2" } */
+/* { dg-do run } */
+
+/* nvptx backend used to emit lo/ls/hs/hi suffixes on unsigned comparison
+   insns instead of the more common lt/le/ge/gt, but ptxas and PTX JIT
+   miscompile 'ls' and 'hi' under some circumstances, such as when the first
+   source operand expands to a constant memory load, as demonstrated below.
+   Reported as NVIDIA bug ID 1725195 (tracker is not public).  */
+
+/* Define this to observe PTX translation breakage.  */
+//#define EMIT_BROKEN_ASM 1
+
+/* Or define this to get expected codegen.  */
+//#define EMIT_WORKING_ASM 1
+
+static __attribute__((noinline,noclone)) int ls(unsigned a)
+{
+    unsigned v;
+    /* %nctaid.x is always 1 in gcc testing.  */
+    asm ("mov.u32 %0, %%nctaid.x;" : "=r"(v));
+#if defined(EMIT_BROKEN_ASM)
+    asm ("set.u32.ls.u32 %0, %1, %0;" : "+r"(a) : "r"(v));
+#elif defined(EMIT_WORKING_ASM)
+    asm ("set.u32.le.u32 %0, %1, %0;" : "+r"(a) : "r"(v));
+#else
+    a = v <= a ? -1 : 0;
+#endif
+    return a;
+}
+static __attribute__((noinline,noclone)) int hi(unsigned a)
+{
+    unsigned v;
+    asm ("mov.u32 %0, %%nctaid.x;" : "=r"(v));
+#if defined(EMIT_BROKEN_ASM)
+    asm ("set.u32.hi.u32 %0, %1, %0;" : "+r"(a) : "r"(v));
+#elif defined(EMIT_WORKING_ASM)
+    asm ("set.u32.gt.u32 %0, %1, %0;" : "+r"(a) : "r"(v));
+#else
+    a = v > a ? -1 : 0;
+#endif
+    return a;
+}
+int main()
+{
+    int i;
+    for (i=0; i<3; i++)
+        if (ls(i) != -(1 <= i) || hi(i) != -(1 > i))
+            __builtin_abort();
+    return 0;
+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]