This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[gomp-nvptx 6/7] nvptx backend: change mul.u32 to mul.lo.u32
- From: Alexander Monakov <amonakov at ispras dot ru>
- To: gcc-patches at gcc dot gnu dot org
- Date: Fri, 18 Mar 2016 20:48:46 +0300
- Subject: [gomp-nvptx 6/7] nvptx backend: change mul.u32 to mul.lo.u32
- Authentication-results: sourceware.org; auth=none
- References: <1458323327-9908-1-git-send-email-amonakov at ispras dot ru>
Recent testing uncovered that PTX JIT may reject attempts to use 'mul.u32' as
a non-widening 32-bit multiply instruction. Use 'mul.lo.u32' to fix 32-bit
code generation and conform to the PTX spec better.
* config/nvptx/nvptx.c (nvptx_init_unisimt_predicate): Emit
'mul.lo.u32' instead of 'mul.u32' for 32-bit ABI target.
(nvptx_declare_function_name): Ditto.
---
gcc/ChangeLog.gomp-nvptx | 6 ++++++
gcc/config/nvptx/nvptx.c | 4 ++--
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 93bf781..bc187ea 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -955,7 +955,7 @@ nvptx_init_unisimt_predicate (FILE *file)
fprintf (file, "\t\t.reg.u%d %%ustmp2;\n", bits);
fprintf (file, "\t\tmov.u32 %%ustmp0, %%tid.y;\n");
fprintf (file, "\t\tmul%s.u32 %%ustmp1, %%ustmp0, 4;\n",
- bits == 64 ? ".wide" : "");
+ bits == 64 ? ".wide" : ".lo");
fprintf (file, "\t\tmov.u%d %%ustmp2, __nvptx_uni;\n", bits);
fprintf (file, "\t\tadd.u%d %%ustmp2, %%ustmp2, %%ustmp1;\n", bits);
fprintf (file, "\t\tld.shared.u32 %%r%d, [%%ustmp2];\n", master);
@@ -1115,7 +1115,7 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
fprintf (file, "\t.reg.u%d %%fstmp2;\n", bits);
fprintf (file, "\tmov.u32 %%fstmp0, %%tid.y;\n");
fprintf (file, "\tmul%s.u32 %%fstmp1, %%fstmp0, %d;\n",
- bits == 64 ? ".wide" : "", bits / 8);
+ bits == 64 ? ".wide" : ".lo", bits / 8);
fprintf (file, "\tmov.u%d %%fstmp2, __nvptx_stacks;\n", bits);
/* fstmp2 = &__nvptx_stacks[tid.y]; */
fprintf (file, "\tadd.u%d %%fstmp2, %%fstmp2, %%fstmp1;\n", bits);