This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[gomp-nvptx] nvptx backend: implement alloca with -msoft-stack
- From: Alexander Monakov <amonakov at ispras dot ru>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Bernd Schmidt <bschmidt at redhat dot com>, Nathan Sidwell <nathan at acm dot org>
- Date: Mon, 14 Dec 2015 16:50:48 +0300
- Subject: [gomp-nvptx] nvptx backend: implement alloca with -msoft-stack
- Authentication-results: sourceware.org; auth=none
This patch implements variable stack allocation for alloca/VLA on NVPTX if
-msoft-stack is enabled. In addition to moving the stack pointer, we need to
copy the updated pointer into __nvptx_stacks[tid.y].
* config/nvptx/nvptx.c (nvptx_declare_function_name): Emit %outargs
using .local %outargs_ar only if not TARGET_SOFT_STACK. Emit %outargs
under TARGET_SOFT_STACK by offsetting from %frame.
(nvptx_get_drap_rtx): Return %argp as the DRAP if needed.
* config/nvptx/nvptx.md (nvptx_register_operand): Allow %outargs under
TARGET_SOFT_STACK.
(nvptx_nonimmediate_operand): Ditto.
(allocate_stack): Implement for TARGET_SOFT_STACK. Remove unused code.
(allocate_stack_<mode>): Remove unused pattern.
(set_softstack_insn): New pattern.
(restore_stack_block): Handle for TARGET_SOFT_STACK.
---
I have committed this patch to the gomp-nvptx branch. Bernd, Nathan, I would
appreciate if you could comment on 'define_predicate' changes in nvptx.md.
There are three predicates that start like this:
if (REG_P (op))
return !HARD_REGISTER_P (op);
if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))
return false;
if (GET_CODE (op) == SUBREG)
return false;
For stack adjustments I need to allow operations on the stack pointer. For
now I've implemented that as a fairly straightforward shortcut, but I guess it
doesn't look very nice. What is the reason to reject "hard registers" there,
in the first place? In any case, I'd like your input if you see a better way
to handle it.
Also, note that there's either a bug or a cleanup opportunity: the third "if"
statement is clearly more general than the second.
No regressions on check-c testsuite (with 'alloca' effective-target enabled).
Thanks.
Alexander
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index b12a7a8..599e460 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -883,7 +883,7 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
HOST_WIDE_INT sz = crtl->outgoing_args_size;
if (sz == 0)
sz = 1;
- if (cfun->machine->has_call_with_varargs)
+ if (!TARGET_SOFT_STACK && cfun->machine->has_call_with_varargs)
{
fprintf (file, "\t.reg.u%d %%outargs;\n"
"\t.local.align 8 .b8 %%outargs_ar["
@@ -897,7 +897,8 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
sz = get_frame_size ();
if (sz == 0 && cfun->machine->has_call_with_sc)
sz = 1;
- if (sz > 0)
+ bool need_sp = cfun->calls_alloca || cfun->machine->has_call_with_varargs;
+ if (sz > 0 || TARGET_SOFT_STACK && need_sp)
{
int alignment = crtl->stack_alignment_needed / BITS_PER_UNIT;
@@ -923,10 +924,15 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
if (alignment > keep_align)
fprintf (file, "\tand.b%d %%frame, %%frame, %d;\n",
bits, -alignment);
+ fprintf (file, "\t.reg.u%d %%outargs;\n", bits);
+ sz = crtl->outgoing_args_size;
+ gcc_assert (sz % keep_align == 0);
+ fprintf (file, "\tsub.u%d %%outargs, %%frame, "
+ HOST_WIDE_INT_PRINT_DEC ";\n", bits, sz);
/* crtl->is_leaf is not initialized because RA is not run. */
if (!leaf_function_p ())
{
- fprintf (file, "\tst.shared.u%d [%%fstmp2], %%frame;\n", bits);
+ fprintf (file, "\tst.shared.u%d [%%fstmp2], %%outargs;\n", bits);
cfun->machine->using_softstack = true;
}
need_softstack_decl = true;
@@ -996,6 +1002,8 @@ nvptx_function_ok_for_sibcall (tree, tree)
static rtx
nvptx_get_drap_rtx (void)
{
+ if (TARGET_SOFT_STACK && stack_realign_drap)
+ return arg_pointer_rtx;
return NULL_RTX;
}
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index ae1909d..130c809 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -69,6 +69,8 @@ (define_attr "divergent" "false,true"
(define_predicate "nvptx_register_operand"
(match_code "reg,subreg")
{
+ if (TARGET_SOFT_STACK && op == stack_pointer_rtx)
+ return true;
if (REG_P (op))
return !HARD_REGISTER_P (op);
if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))
@@ -123,6 +125,8 @@ (define_predicate "nvptx_general_operand"
(define_predicate "nvptx_nonimmediate_operand"
(match_code "reg,subreg,mem")
{
+ if (TARGET_SOFT_STACK && op == stack_pointer_rtx)
+ return true;
if (REG_P (op))
return (op != frame_pointer_rtx
&& op != arg_pointer_rtx
@@ -1061,31 +1065,41 @@ (define_expand "allocate_stack"
(match_operand 1 "nvptx_register_operand")]
""
{
+ if (TARGET_SOFT_STACK)
+ {
+ emit_move_insn (stack_pointer_rtx,
+ gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
+ emit_insn (gen_set_softstack_insn (stack_pointer_rtx));
+ emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+ DONE;
+ }
/* The ptx documentation specifies an alloca intrinsic (for 32 bit
only) but notes it is not implemented. The assembler emits a
confused error message. Issue a blunt one now instead. */
sorry ("target cannot support alloca.");
emit_insn (gen_nop ());
DONE;
- if (TARGET_ABI64)
- emit_insn (gen_allocate_stack_di (operands[0], operands[1]));
- else
- emit_insn (gen_allocate_stack_si (operands[0], operands[1]));
- DONE;
})
-(define_insn "allocate_stack_<mode>"
- [(set (match_operand:P 0 "nvptx_register_operand" "=R")
- (unspec:P [(match_operand:P 1 "nvptx_register_operand" "R")]
- UNSPEC_ALLOCA))]
- ""
- "%.\\tcall (%0), %%alloca, (%1);")
+(define_insn "set_softstack_insn"
+ [(unspec [(match_operand 0 "nvptx_register_operand" "R")] UNSPEC_ALLOCA)]
+ "TARGET_SOFT_STACK"
+{
+ return (cfun->machine->using_softstack
+ ? "%.\\tst.shared%t0\\t[%%fstmp2], %0;"
+ : "");
+})
(define_expand "restore_stack_block"
[(match_operand 0 "register_operand" "")
(match_operand 1 "register_operand" "")]
""
{
+ if (TARGET_SOFT_STACK)
+ {
+ emit_move_insn (operands[0], operands[1]);
+ emit_insn (gen_set_softstack_insn (operands[0]));
+ }
DONE;
})
--
1.8.3.1