2005-05-03 Jakub Jelinek PR middle-end/21265 * expr.h (enum block_op_methods): Add BLOCK_OP_TAILCALL. (clear_storage): Add argument. * expr.c (emit_block_move_via_libcall, clear_storage_via_libcall): Add tailcall argument, set CALL_EXPR_TAILCALL of the CALL_EXPR to tailcall. (emit_block_move): Handle BLOCK_OP_TAILCALL method. (clear_storage): Add method argument, handle BLOCK_OP_TAILCALL. (store_expr, store_constructor): Adjust callers. * builtins.c (expand_builtin_memcpy): Pass BLOCK_OP_TAILCALL to emit_block_move if CALL_EXPR_TAILCALL (exp). (expand_builtin_memmove): Add ORIG_EXP argument, copy CALL_EXPR_TAILCALL from ORIG_EXP to the new CALL_EXPR. (expand_builtin_bcopy): Replace ARGLIST and TYPE arguments with EXP. Pass EXP to expand_builtin_memmove. (expand_builtin_memset): Add ORIG_EXP argument, pass BLOCK_OP_TAILCALL to clear_storage if CALL_EXPR_TAILCALL (orig_exp). (expand_builtin_bzero): Replace ARGLIST argument with EXP. Pass EXP to expand_builtin_memset. (expand_builtin_strcmp): Copy CALL_EXPR_TAILCALL from EXP to the new CALL_EXPR. (expand_builtin_strncmp): Likewise. (expand_builtin_printf): Replace ARGLIST argument with EXP. Copy CALL_EXPR_TAILCALL from EXP to the new CALL_EXPR. (expand_builtin_fprintf): Likewise. (expand_builtin): Adjust calls to expand_builtin_{memmove,bcopy,memset,bzero,{,f}printf}. * gcc.dg/20050503-1.c: New test. --- gcc/expr.h.jj 2005-04-25 11:55:59.000000000 +0200 +++ gcc/expr.h 2005-05-02 23:42:33.000000000 +0200 @@ -367,7 +367,9 @@ enum block_op_methods { BLOCK_OP_NORMAL, BLOCK_OP_NO_LIBCALL, - BLOCK_OP_CALL_PARM + BLOCK_OP_CALL_PARM, + /* Like BLOCK_OP_NORMAL, but the libcall can be tail call optimized. */ + BLOCK_OP_TAILCALL }; extern void init_block_move_fn (const char *); @@ -419,7 +421,7 @@ extern void use_group_regs (rtx *, rtx); /* Write zeros through the storage of OBJECT. If OBJECT has BLKmode, SIZE is its length in bytes. */ -extern rtx clear_storage (rtx, rtx); +extern rtx clear_storage (rtx, rtx, enum block_op_methods); /* Determine whether the LEN bytes can be moved by using several move instructions. Return nonzero if a call to move_by_pieces should --- gcc/expr.c.jj 2005-04-29 09:17:56.000000000 +0200 +++ gcc/expr.c 2005-05-02 23:42:33.000000000 +0200 @@ -126,7 +126,7 @@ static void move_by_pieces_1 (rtx (*) (r struct move_by_pieces *); static bool block_move_libcall_safe_for_call_parm (void); static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned); -static rtx emit_block_move_via_libcall (rtx, rtx, rtx); +static rtx emit_block_move_via_libcall (rtx, rtx, rtx, bool); static tree emit_block_move_libcall_fn (int); static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned); static rtx clear_by_pieces_1 (void *, HOST_WIDE_INT, enum machine_mode); @@ -135,7 +135,7 @@ static void store_by_pieces_1 (struct st static void store_by_pieces_2 (rtx (*) (rtx, ...), enum machine_mode, struct store_by_pieces *); static bool clear_storage_via_clrmem (rtx, rtx, unsigned); -static rtx clear_storage_via_libcall (rtx, rtx); +static rtx clear_storage_via_libcall (rtx, rtx, bool); static tree clear_storage_libcall_fn (int); static rtx compress_float_constant (rtx, rtx); static rtx get_subtarget (rtx); @@ -1148,6 +1148,7 @@ emit_block_move (rtx x, rtx y, rtx size, switch (method) { case BLOCK_OP_NORMAL: + case BLOCK_OP_TAILCALL: may_use_call = true; break; @@ -1196,7 +1197,8 @@ emit_block_move (rtx x, rtx y, rtx size, else if (emit_block_move_via_movmem (x, y, size, align)) ; else if (may_use_call) - retval = emit_block_move_via_libcall (x, y, size); + retval = emit_block_move_via_libcall (x, y, size, + method == BLOCK_OP_TAILCALL); else emit_block_move_via_loop (x, y, size, align); @@ -1325,7 +1327,7 @@ emit_block_move_via_movmem (rtx x, rtx y Return the return value from memcpy, 0 otherwise. */ static rtx -emit_block_move_via_libcall (rtx dst, rtx src, rtx size) +emit_block_move_via_libcall (rtx dst, rtx src, rtx size, bool tailcall) { rtx dst_addr, src_addr; tree call_expr, arg_list, fn, src_tree, dst_tree, size_tree; @@ -1367,6 +1369,7 @@ emit_block_move_via_libcall (rtx dst, rt call_expr = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (fn)), fn); call_expr = build3 (CALL_EXPR, TREE_TYPE (TREE_TYPE (fn)), call_expr, arg_list, NULL_TREE); + CALL_EXPR_TAILCALL (call_expr) = tailcall; retval = expand_expr (call_expr, NULL_RTX, VOIDmode, 0); @@ -2427,11 +2430,13 @@ store_by_pieces_2 (rtx (*genfun) (rtx, . its length in bytes. */ rtx -clear_storage (rtx object, rtx size) +clear_storage (rtx object, rtx size, enum block_op_methods method) { enum machine_mode mode = GET_MODE (object); unsigned int align; + gcc_assert (method == BLOCK_OP_NORMAL || method == BLOCK_OP_TAILCALL); + /* If OBJECT is not BLKmode and SIZE is the same size as its mode, just move a zero. Otherwise, do this a piece at a time. */ if (mode != BLKmode @@ -2468,7 +2473,8 @@ clear_storage (rtx object, rtx size) else if (clear_storage_via_clrmem (object, size, align)) ; else - return clear_storage_via_libcall (object, size); + return clear_storage_via_libcall (object, size, + method == BLOCK_OP_TAILCALL); return NULL; } @@ -2533,7 +2539,7 @@ clear_storage_via_clrmem (rtx object, rt Return the return value of memset, 0 otherwise. */ static rtx -clear_storage_via_libcall (rtx object, rtx size) +clear_storage_via_libcall (rtx object, rtx size, bool tailcall) { tree call_expr, arg_list, fn, object_tree, size_tree; enum machine_mode size_mode; @@ -2566,6 +2572,7 @@ clear_storage_via_libcall (rtx object, r call_expr = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (fn)), fn); call_expr = build3 (CALL_EXPR, TREE_TYPE (TREE_TYPE (fn)), call_expr, arg_list, NULL_TREE); + CALL_EXPR_TAILCALL (call_expr) = tailcall; retval = expand_expr (call_expr, NULL_RTX, VOIDmode, 0); @@ -4305,7 +4312,7 @@ store_expr (tree exp, rtx target, int ca } if (size != const0_rtx) - clear_storage (target, size); + clear_storage (target, size, BLOCK_OP_NORMAL); if (label) emit_label (label); @@ -4659,7 +4666,7 @@ store_constructor (tree exp, rtx target, && ! CONSTRUCTOR_ELTS (exp)) /* If the constructor is empty, clear the union. */ { - clear_storage (target, expr_size (exp)); + clear_storage (target, expr_size (exp), BLOCK_OP_NORMAL); cleared = 1; } @@ -4687,7 +4694,7 @@ store_constructor (tree exp, rtx target, || ((HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (target)) == size))) { - clear_storage (target, GEN_INT (size)); + clear_storage (target, GEN_INT (size), BLOCK_OP_NORMAL); cleared = 1; } @@ -4887,7 +4894,7 @@ store_constructor (tree exp, rtx target, if (REG_P (target)) emit_move_insn (target, CONST0_RTX (GET_MODE (target))); else - clear_storage (target, GEN_INT (size)); + clear_storage (target, GEN_INT (size), BLOCK_OP_NORMAL); cleared = 1; } @@ -5132,7 +5139,7 @@ store_constructor (tree exp, rtx target, if (REG_P (target)) emit_move_insn (target, CONST0_RTX (GET_MODE (target))); else - clear_storage (target, GEN_INT (size)); + clear_storage (target, GEN_INT (size), BLOCK_OP_NORMAL); cleared = 1; } --- gcc/builtins.c.jj 2005-05-02 09:51:51.000000000 +0200 +++ gcc/builtins.c 2005-05-03 13:33:40.000000000 +0200 @@ -111,15 +111,15 @@ static rtx expand_builtin_strspn (tree, static rtx expand_builtin_strcspn (tree, rtx, enum machine_mode); static rtx expand_builtin_memcpy (tree, rtx, enum machine_mode); static rtx expand_builtin_mempcpy (tree, tree, rtx, enum machine_mode, int); -static rtx expand_builtin_memmove (tree, tree, rtx, enum machine_mode); -static rtx expand_builtin_bcopy (tree, tree); +static rtx expand_builtin_memmove (tree, tree, rtx, enum machine_mode, tree); +static rtx expand_builtin_bcopy (tree); static rtx expand_builtin_strcpy (tree, rtx, enum machine_mode); static rtx expand_builtin_stpcpy (tree, rtx, enum machine_mode); static rtx builtin_strncpy_read_str (void *, HOST_WIDE_INT, enum machine_mode); static rtx expand_builtin_strncpy (tree, rtx, enum machine_mode); static rtx builtin_memset_read_str (void *, HOST_WIDE_INT, enum machine_mode); static rtx builtin_memset_gen_str (void *, HOST_WIDE_INT, enum machine_mode); -static rtx expand_builtin_memset (tree, rtx, enum machine_mode); +static rtx expand_builtin_memset (tree, rtx, enum machine_mode, tree); static rtx expand_builtin_bzero (tree); static rtx expand_builtin_strlen (tree, rtx, enum machine_mode); static rtx expand_builtin_strstr (tree, tree, rtx, enum machine_mode); @@ -2819,7 +2819,8 @@ expand_builtin_memcpy (tree exp, rtx tar /* Copy word part most expediently. */ dest_addr = emit_block_move (dest_mem, src_mem, len_rtx, - BLOCK_OP_NORMAL); + CALL_EXPR_TAILCALL (exp) + ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL); if (dest_addr == 0) { @@ -2926,7 +2927,7 @@ expand_builtin_mempcpy (tree arglist, tr static rtx expand_builtin_memmove (tree arglist, tree type, rtx target, - enum machine_mode mode) + enum machine_mode mode, tree orig_exp) { if (!validate_arglist (arglist, POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE)) @@ -2958,11 +2959,13 @@ expand_builtin_memmove (tree arglist, tr normal memcpy. */ if (readonly_data_expr (src)) { - tree const fn = implicit_built_in_decls[BUILT_IN_MEMCPY]; + tree fn = implicit_built_in_decls[BUILT_IN_MEMCPY]; if (!fn) return 0; - return expand_expr (build_function_call_expr (fn, arglist), - target, mode, EXPAND_NORMAL); + fn = build_function_call_expr (fn, arglist); + if (TREE_CODE (fn) == CALL_EXPR) + CALL_EXPR_TAILCALL (fn) = CALL_EXPR_TAILCALL (orig_exp); + return expand_expr (fn, target, mode, EXPAND_NORMAL); } /* If length is 1 and we can expand memcpy call inline, @@ -2984,8 +2987,10 @@ expand_builtin_memmove (tree arglist, tr if we failed the caller should emit a normal call. */ static rtx -expand_builtin_bcopy (tree arglist, tree type) +expand_builtin_bcopy (tree exp) { + tree arglist = TREE_OPERAND (exp, 1); + tree type = TREE_TYPE (exp); tree src, dest, size, newarglist; if (!validate_arglist (arglist, @@ -3005,7 +3010,7 @@ expand_builtin_bcopy (tree arglist, tree newarglist = tree_cons (NULL_TREE, src, newarglist); newarglist = tree_cons (NULL_TREE, dest, newarglist); - return expand_builtin_memmove (newarglist, type, const0_rtx, VOIDmode); + return expand_builtin_memmove (newarglist, type, const0_rtx, VOIDmode, exp); } #ifndef HAVE_movstr @@ -3299,7 +3304,8 @@ builtin_memset_gen_str (void *data, HOST convenient). */ static rtx -expand_builtin_memset (tree arglist, rtx target, enum machine_mode mode) +expand_builtin_memset (tree arglist, rtx target, enum machine_mode mode, + tree orig_exp) { if (!validate_arglist (arglist, POINTER_TYPE, INTEGER_TYPE, INTEGER_TYPE, VOID_TYPE)) @@ -3385,7 +3391,9 @@ expand_builtin_memset (tree arglist, rtx dest_mem = get_memory_rtx (dest); set_mem_align (dest_mem, dest_align); - dest_addr = clear_storage (dest_mem, len_rtx); + dest_addr = clear_storage (dest_mem, len_rtx, + CALL_EXPR_TAILCALL (orig_exp) + ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL); if (dest_addr == 0) { @@ -3401,8 +3409,9 @@ expand_builtin_memset (tree arglist, rtx if we failed the caller should emit a normal call. */ static rtx -expand_builtin_bzero (tree arglist) +expand_builtin_bzero (tree exp) { + tree arglist = TREE_OPERAND (exp, 1); tree dest, size, newarglist; if (!validate_arglist (arglist, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE)) @@ -3420,7 +3429,7 @@ expand_builtin_bzero (tree arglist) newarglist = tree_cons (NULL_TREE, integer_zero_node, newarglist); newarglist = tree_cons (NULL_TREE, dest, newarglist); - return expand_builtin_memset (newarglist, const0_rtx, VOIDmode); + return expand_builtin_memset (newarglist, const0_rtx, VOIDmode, exp); } /* Expand expression EXP, which is a call to the memcmp built-in function. @@ -3559,7 +3568,7 @@ expand_builtin_strcmp (tree exp, rtx tar tree len, len1, len2; rtx arg1_rtx, arg2_rtx, arg3_rtx; rtx result, insn; - tree fndecl; + tree fndecl, fn; int arg1_align = get_pointer_alignment (arg1, BIGGEST_ALIGNMENT) / BITS_PER_UNIT; @@ -3643,8 +3652,10 @@ expand_builtin_strcmp (tree exp, rtx tar arglist = build_tree_list (NULL_TREE, arg2); arglist = tree_cons (NULL_TREE, arg1, arglist); fndecl = get_callee_fndecl (exp); - exp = build_function_call_expr (fndecl, arglist); - return expand_call (exp, target, target == const0_rtx); + fn = build_function_call_expr (fndecl, arglist); + if (TREE_CODE (fn) == CALL_EXPR) + CALL_EXPR_TAILCALL (fn) = CALL_EXPR_TAILCALL (exp); + return expand_call (fn, target, target == const0_rtx); } #endif return 0; @@ -3681,7 +3692,7 @@ expand_builtin_strncmp (tree exp, rtx ta tree len, len1, len2; rtx arg1_rtx, arg2_rtx, arg3_rtx; rtx result, insn; - tree fndecl; + tree fndecl, fn; int arg1_align = get_pointer_alignment (arg1, BIGGEST_ALIGNMENT) / BITS_PER_UNIT; @@ -3771,8 +3782,10 @@ expand_builtin_strncmp (tree exp, rtx ta arglist = tree_cons (NULL_TREE, arg2, arglist); arglist = tree_cons (NULL_TREE, arg1, arglist); fndecl = get_callee_fndecl (exp); - exp = build_function_call_expr (fndecl, arglist); - return expand_call (exp, target, target == const0_rtx); + fn = build_function_call_expr (fndecl, arglist); + if (TREE_CODE (fn) == CALL_EXPR) + CALL_EXPR_TAILCALL (fn) = CALL_EXPR_TAILCALL (exp); + return expand_call (fn, target, target == const0_rtx); } #endif return 0; @@ -4704,15 +4717,16 @@ build_string_literal (int len, const cha return t; } -/* Expand a call to printf or printf_unlocked with argument list ARGLIST. +/* Expand EXP, a call to printf or printf_unlocked. Return 0 if a normal call should be emitted rather than transforming the function inline. If convenient, the result should be placed in TARGET with mode MODE. UNLOCKED indicates this is a printf_unlocked call. */ static rtx -expand_builtin_printf (tree arglist, rtx target, enum machine_mode mode, +expand_builtin_printf (tree exp, rtx target, enum machine_mode mode, bool unlocked) { + tree arglist = TREE_OPERAND (exp, 1); tree fn_putchar = unlocked ? implicit_built_in_decls[BUILT_IN_PUTCHAR_UNLOCKED] : implicit_built_in_decls[BUILT_IN_PUTCHAR]; @@ -4803,19 +4817,22 @@ expand_builtin_printf (tree arglist, rtx if (!fn) return 0; - return expand_expr (build_function_call_expr (fn, arglist), - target, mode, EXPAND_NORMAL); + fn = build_function_call_expr (fn, arglist); + if (TREE_CODE (fn) == CALL_EXPR) + CALL_EXPR_TAILCALL (fn) = CALL_EXPR_TAILCALL (exp); + return expand_expr (fn, target, mode, EXPAND_NORMAL); } -/* Expand a call to fprintf or fprintf_unlocked with argument list ARGLIST. +/* Expand EXP, a call to fprintf or fprintf_unlocked. Return 0 if a normal call should be emitted rather than transforming the function inline. If convenient, the result should be placed in TARGET with mode MODE. UNLOCKED indicates this is a fprintf_unlocked call. */ static rtx -expand_builtin_fprintf (tree arglist, rtx target, enum machine_mode mode, +expand_builtin_fprintf (tree exp, rtx target, enum machine_mode mode, bool unlocked) { + tree arglist = TREE_OPERAND (exp, 1); tree fn_fputc = unlocked ? implicit_built_in_decls[BUILT_IN_FPUTC_UNLOCKED] : implicit_built_in_decls[BUILT_IN_FPUTC]; tree fn_fputs = unlocked ? implicit_built_in_decls[BUILT_IN_FPUTS_UNLOCKED] @@ -4897,8 +4914,10 @@ expand_builtin_fprintf (tree arglist, rt if (!fn) return 0; - return expand_expr (build_function_call_expr (fn, arglist), - target, mode, EXPAND_NORMAL); + fn = build_function_call_expr (fn, arglist); + if (TREE_CODE (fn) == CALL_EXPR) + CALL_EXPR_TAILCALL (fn) = CALL_EXPR_TAILCALL (exp); + return expand_expr (fn, target, mode, EXPAND_NORMAL); } /* Expand a call to sprintf with argument list ARGLIST. Return 0 if @@ -5861,25 +5880,26 @@ expand_builtin (tree exp, rtx target, rt break; case BUILT_IN_MEMMOVE: - target = expand_builtin_memmove (arglist, TREE_TYPE (exp), target, mode); + target = expand_builtin_memmove (arglist, TREE_TYPE (exp), target, + mode, exp); if (target) return target; break; case BUILT_IN_BCOPY: - target = expand_builtin_bcopy (arglist, TREE_TYPE (exp)); + target = expand_builtin_bcopy (exp); if (target) return target; break; case BUILT_IN_MEMSET: - target = expand_builtin_memset (arglist, target, mode); + target = expand_builtin_memset (arglist, target, mode, exp); if (target) return target; break; case BUILT_IN_BZERO: - target = expand_builtin_bzero (arglist); + target = expand_builtin_bzero (exp); if (target) return target; break; @@ -5956,13 +5976,13 @@ expand_builtin (tree exp, rtx target, rt return const0_rtx; case BUILT_IN_PRINTF: - target = expand_builtin_printf (arglist, target, mode, false); + target = expand_builtin_printf (exp, target, mode, false); if (target) return target; break; case BUILT_IN_PRINTF_UNLOCKED: - target = expand_builtin_printf (arglist, target, mode, true); + target = expand_builtin_printf (exp, target, mode, true); if (target) return target; break; @@ -5979,13 +5999,13 @@ expand_builtin (tree exp, rtx target, rt break; case BUILT_IN_FPRINTF: - target = expand_builtin_fprintf (arglist, target, mode, false); + target = expand_builtin_fprintf (exp, target, mode, false); if (target) return target; break; case BUILT_IN_FPRINTF_UNLOCKED: - target = expand_builtin_fprintf (arglist, target, mode, true); + target = expand_builtin_fprintf (exp, target, mode, true); if (target) return target; break; --- gcc/testsuite/gcc.dg/20050503-1.c.jj 2005-05-03 12:25:49.000000000 +0200 +++ gcc/testsuite/gcc.dg/20050503-1.c 2005-05-03 13:23:18.000000000 +0200 @@ -0,0 +1,49 @@ +/* PR middle-end/21265 + Test whether tail call information is propagated through builtin + expanders. */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +typedef __SIZE_TYPE__ size_t; + +#define F(n, rettype, name, args, callargs) \ +extern rettype name args; \ +rettype test##n args \ +{ \ + return name callargs; \ +} +#define F1(n, rettype, name, t1, callargs) \ + F (n, rettype, name, (t1 a1), callargs) +#define F2(n, rettype, name, t1, t2, callargs) \ + F (n, rettype, name, (t1 a1, t2 a2), callargs) +#define F3(n, rettype, name, t1, t2, t3, callargs) \ + F (n, rettype, name, (t1 a1, t2 a2, t3 a3), callargs) + +F3 (1a, void *, memcpy, void *, const void *, size_t, (a1, a2, a3)) +F3 (1b, void *, memcpy, void *, const void *, size_t, (a1, a2, 10)) +F3 (2a, void *, mempcpy, void *, const void *, size_t, (a1, a2, a3)) +F3 (2b, void *, mempcpy, void *, const void *, size_t, (a1, a2, 10)) +F3 (3a, void *, memmove, void *, const void *, size_t, (a1, a2, a3)) +F3 (3b, void *, memmove, void *, const void *, size_t, (a1, "abcdefghijklmno", a3)) +F3 (4a, void *, memset, void *, int, size_t, (a1, a2, a3)) +F3 (4b, void *, memset, void *, int, size_t, (a1, a2, 156)) +F3 (4c, void *, memset, void *, int, size_t, (a1, 0, a3)) +F3 (4d, void *, memset, void *, int, size_t, (a1, 0, 10000)) +F3 (5a, int, memcmp, const void *, const void *, size_t, (a1, a2, a3)) +F3 (5b, int, memcmp, const void *, const void *, size_t, (a1, "abcdefghijkl", a3)) +F2 (6, char *, strcpy, char *, const char *, (a1, a2)) +F2 (7, char *, stpcpy, char *, const char *, (a1, a2)) +F3 (8, char *, strncpy, char *, const char *, size_t, (a1, a2, a3)) +F3 (9, char *, stpncpy, char *, const char *, size_t, (a1, a2, a3)) +F2 (10, char *, strcat, char *, const char *, (a1, a2)) +F3 (11, char *, strncat, char *, const char *, size_t, (a1, a2, a3)) +F1 (12a, size_t, strlen, const char *, (a1)) +F1 (12b, size_t, strlen, const char *, ("foobar")) +F2 (13a, int, strcmp, const char *, const char *, (a1, a2)) +F2 (13b, int, strcmp, const char *, const char *, (a1, "abcdefghijklm")) +F3 (14a, int, strncmp, const char *, const char *, size_t, (a1, a2, a3)) +F3 (14b, int, strncmp, const char *, const char *, size_t, (a1, "abcdefghijklm", 10)) +F2 (15, char *, strchr, const char *, int, (a1, a2)) + +/* All the calls above should be tail call optimized on i?86/x86-64. */ +/* { dg-final { scan-assembler-not "call" { target i?86-*-linux* x86_64-*-linux* } } } */