This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: Implement stack arrays even for unknown sizes
Dear Michael,
Thanks for updating the patch. I am afraid that my attention to
gfortran is somewhat limited at present. However, I see that
Dominique has verified your patch and that all is well.
The resulting speed up for nf.f90 is rather remarkable. What specific
feature of the fortran leads to a 30=>15s ?
Cheers
Paul
On Mon, Apr 11, 2011 at 6:04 PM, Michael Matz <matz@suse.de> wrote:
> On Sat, 9 Apr 2011, Paul Richard Thomas wrote:
>
>> I find that both nf.f90 and capacita.f90 segfault in runtime for any
>> stack size.
>
> Try this patch. ?I've verified that capacita and nf work with it and
> -march=native -ffast-math -funroll-loops -fstack-arrays -O3 . ?In fact all
> of polyhedron works for me on these flags. ?(I've set a ulimit -s of
> 512MB, but I don't know if such a large amount is required).
>
>
> Ciao,
> Michael.
>
> ? ? ? ?* trans-array.c (toplevel): Include gimple.h.
> ? ? ? ?(gfc_trans_allocate_array_storage): Check flag_stack_arrays,
> ? ? ? ?properly expand variable length arrays.
> ? ? ? ?(gfc_trans_auto_array_allocation): If flag_stack_arrays create
> ? ? ? ?variable length decls and associate them with their scope.
> ? ? ? ?* gfortran.h (gfc_option_t): Add flag_stack_arrays member.
> ? ? ? ?* options.c (gfc_init_options): Handle -fstack_arrays option.
> ? ? ? ?* lang.opt (fstack-arrays): Add option.
> ? ? ? ?* invoke.texi (Code Gen Options): Document it.
> ? ? ? ?* Make-lang.in (trans-array.o): Depend on GIMPLE_H.
>
> Index: trans-array.c
> ===================================================================
> *** trans-array.c ? ? ? (revision 172206)
> --- trans-array.c ? ? ? (working copy)
> *************** along with GCC; see the file COPYING3.
> *** 81,86 ****
> --- 81,87 ----
> ?#include "system.h"
> ?#include "coretypes.h"
> ?#include "tree.h"
> + #include "gimple.h"
> ?#include "diagnostic-core.h" ?/* For internal_error/fatal_error. ?*/
> ?#include "flags.h"
> ?#include "gfortran.h"
> *************** gfc_trans_allocate_array_storage (stmtbl
> *** 630,647 ****
> ? ? ?{
> ? ? ? ?/* Allocate the temporary. ?*/
> ? ? ? ?onstack = !dynamic && initial == NULL_TREE
> ! ? ? ? ? ? ? ? ? ? ? ? ?&& gfc_can_put_var_on_stack (size);
>
> ? ? ? ?if (onstack)
> ? ? ? ?{
> ? ? ? ? ?/* Make a temporary variable to hold the data. ?*/
> ? ? ? ? ?tmp = fold_build2_loc (input_location, MINUS_EXPR, TREE_TYPE (nelem),
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? nelem, gfc_index_one_node);
> ? ? ? ? ?tmp = build_range_type (gfc_array_index_type, gfc_index_zero_node,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?tmp);
> ? ? ? ? ?tmp = build_array_type (gfc_get_element_type (TREE_TYPE (desc)),
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?tmp);
> ? ? ? ? ?tmp = gfc_create_var (tmp, "A");
> ? ? ? ? ?tmp = gfc_build_addr_expr (NULL_TREE, tmp);
> ? ? ? ? ?gfc_conv_descriptor_data_set (pre, desc, tmp);
> ? ? ? ?}
> --- 631,654 ----
> ? ? ?{
> ? ? ? ?/* Allocate the temporary. ?*/
> ? ? ? ?onstack = !dynamic && initial == NULL_TREE
> ! ? ? ? ? ? ? ? ? ? ? ? ?&& (gfc_option.flag_stack_arrays
> ! ? ? ? ? ? ? ? ? ? ? ? ? ? ?|| gfc_can_put_var_on_stack (size));
>
> ? ? ? ?if (onstack)
> ? ? ? ?{
> ? ? ? ? ?/* Make a temporary variable to hold the data. ?*/
> ? ? ? ? ?tmp = fold_build2_loc (input_location, MINUS_EXPR, TREE_TYPE (nelem),
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? nelem, gfc_index_one_node);
> + ? ? ? ? tmp = gfc_evaluate_now (tmp, pre);
> ? ? ? ? ?tmp = build_range_type (gfc_array_index_type, gfc_index_zero_node,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?tmp);
> ? ? ? ? ?tmp = build_array_type (gfc_get_element_type (TREE_TYPE (desc)),
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?tmp);
> ? ? ? ? ?tmp = gfc_create_var (tmp, "A");
> + ? ? ? ? gfc_add_expr_to_block (pre,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?fold_build1_loc (input_location,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? DECL_EXPR, TREE_TYPE (tmp),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? tmp));
> ? ? ? ? ?tmp = gfc_build_addr_expr (NULL_TREE, tmp);
> ? ? ? ? ?gfc_conv_descriptor_data_set (pre, desc, tmp);
> ? ? ? ?}
> *************** gfc_trans_auto_array_allocation (tree de
> *** 4744,4749 ****
> --- 4751,4758 ----
> ? ?tree tmp;
> ? ?tree size;
> ? ?tree offset;
> + ? tree space;
> + ? tree inittree;
> ? ?bool onstack;
>
> ? ?gcc_assert (!(sym->attr.pointer || sym->attr.allocatable));
> *************** gfc_trans_auto_array_allocation (tree de
> *** 4800,4814 ****
> ? ? ? ?return;
> ? ? ?}
>
> ! ? /* The size is the number of elements in the array, so multiply by the
> ! ? ? ?size of an element to get the total size. ?*/
> ! ? tmp = TYPE_SIZE_UNIT (gfc_get_element_type (type));
> ! ? size = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type,
> ! ? ? ? ? ? ? ? ? ? ? ? ? size, fold_convert (gfc_array_index_type, tmp));
>
> ! ? /* Allocate memory to hold the data. ?*/
> ! ? tmp = gfc_call_malloc (&init, TREE_TYPE (decl), size);
> ! ? gfc_add_modify (&init, decl, tmp);
>
> ? ?/* Set offset of the array. ?*/
> ? ?if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL)
> --- 4809,4838 ----
> ? ? ? ?return;
> ? ? ?}
>
> ! ? if (gfc_option.flag_stack_arrays)
> ! ? ? {
> ! ? ? ? gcc_assert (TREE_CODE (TREE_TYPE (decl)) == POINTER_TYPE);
> ! ? ? ? space = build_decl (sym->declared_at.lb->location,
> ! ? ? ? ? ? ? ? ? ? ? ? ? VAR_DECL, create_tmp_var_name ("A"),
> ! ? ? ? ? ? ? ? ? ? ? ? ? TREE_TYPE (TREE_TYPE (decl)));
> ! ? ? ? gfc_trans_vla_type_sizes (sym, &init);
> ! ? ? }
> ! ? else
> ! ? ? {
> ! ? ? ? /* The size is the number of elements in the array, so multiply by the
> ! ? ? ? ?size of an element to get the total size. ?*/
> ! ? ? ? tmp = TYPE_SIZE_UNIT (gfc_get_element_type (type));
> ! ? ? ? size = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type,
> ! ? ? ? ? ? ? ? ? ? ? ? ? ? ? size, fold_convert (gfc_array_index_type, tmp));
>
> ! ? ? ? /* Allocate memory to hold the data. ?*/
> ! ? ? ? tmp = gfc_call_malloc (&init, TREE_TYPE (decl), size);
> ! ? ? ? gfc_add_modify (&init, decl, tmp);
> !
> ! ? ? ? /* Free the temporary. ?*/
> ! ? ? ? tmp = gfc_call_free (convert (pvoid_type_node, decl));
> ! ? ? ? space = NULL_TREE;
> ! ? ? }
>
> ? ?/* Set offset of the array. ?*/
> ? ?if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL)
> *************** gfc_trans_auto_array_allocation (tree de
> *** 4817,4826 ****
> ? ?/* Automatic arrays should not have initializers. ?*/
> ? ?gcc_assert (!sym->value);
>
> ! ? /* Free the temporary. ?*/
> ! ? tmp = gfc_call_free (convert (pvoid_type_node, decl));
>
> ! ? gfc_add_init_cleanup (block, gfc_finish_block (&init), tmp);
> ?}
>
>
> --- 4841,4866 ----
> ? ?/* Automatic arrays should not have initializers. ?*/
> ? ?gcc_assert (!sym->value);
>
> ! ? inittree = gfc_finish_block (&init);
> !
> ! ? if (space)
> ! ? ? {
> ! ? ? ? tree addr;
> ! ? ? ? pushdecl (space);
>
> ! ? ? ? /* Don't create new scope, emit the DECL_EXPR in exactly the scope
> ! ? ? ? ? ?where also space is located. ?*/
> ! ? ? ? gfc_init_block (&init);
> ! ? ? ? tmp = fold_build1_loc (input_location, DECL_EXPR,
> ! ? ? ? ? ? ? ? ? ? ? ? ? ? ?TREE_TYPE (space), space);
> ! ? ? ? gfc_add_expr_to_block (&init, tmp);
> ! ? ? ? addr = fold_build1_loc (sym->declared_at.lb->location,
> ! ? ? ? ? ? ? ? ? ? ? ? ? ? ? ADDR_EXPR, TREE_TYPE (decl), space);
> ! ? ? ? gfc_add_modify (&init, decl, addr);
> ! ? ? ? gfc_add_init_cleanup (block, gfc_finish_block (&init), NULL_TREE);
> ! ? ? ? tmp = NULL_TREE;
> ! ? ? }
> ! ? gfc_add_init_cleanup (block, inittree, tmp);
> ?}
>
>
> Index: Make-lang.in
> ===================================================================
> *** Make-lang.in ? ? ? ?(revision 172206)
> --- Make-lang.in ? ? ? ?(working copy)
> *************** fortran/trans-stmt.o: $(GFORTRAN_TRANS_D
> *** 353,359 ****
> ?fortran/trans-openmp.o: $(GFORTRAN_TRANS_DEPS)
> ?fortran/trans-io.o: $(GFORTRAN_TRANS_DEPS) gt-fortran-trans-io.h \
> ? ?fortran/ioparm.def
> ! fortran/trans-array.o: $(GFORTRAN_TRANS_DEPS)
> ?fortran/trans-intrinsic.o: $(GFORTRAN_TRANS_DEPS) fortran/mathbuiltins.def \
> ? ?gt-fortran-trans-intrinsic.h
> ?fortran/dependency.o: $(GFORTRAN_TRANS_DEPS) fortran/dependency.h
> --- 353,359 ----
> ?fortran/trans-openmp.o: $(GFORTRAN_TRANS_DEPS)
> ?fortran/trans-io.o: $(GFORTRAN_TRANS_DEPS) gt-fortran-trans-io.h \
> ? ?fortran/ioparm.def
> ! fortran/trans-array.o: $(GFORTRAN_TRANS_DEPS) $(GIMPLE_H)
> ?fortran/trans-intrinsic.o: $(GFORTRAN_TRANS_DEPS) fortran/mathbuiltins.def \
> ? ?gt-fortran-trans-intrinsic.h
> ?fortran/dependency.o: $(GFORTRAN_TRANS_DEPS) fortran/dependency.h
> Index: gfortran.h
> ===================================================================
> *** gfortran.h ?(revision 172206)
> --- gfortran.h ?(working copy)
> *************** typedef struct
> *** 2220,2225 ****
> --- 2220,2226 ----
> ? ?int flag_d_lines;
> ? ?int gfc_flag_openmp;
> ? ?int flag_sign_zero;
> + ? int flag_stack_arrays;
> ? ?int flag_module_private;
> ? ?int flag_recursive;
> ? ?int flag_init_local_zero;
> Index: lang.opt
> ===================================================================
> *** lang.opt ? ?(revision 172206)
> --- lang.opt ? ?(working copy)
> *************** fmax-stack-var-size=
> *** 454,459 ****
> --- 454,463 ----
> ?Fortran RejectNegative Joined UInteger
> ?-fmax-stack-var-size=<n> ? ? ?Size in bytes of the largest array that will be put on the stack
>
> + fstack-arrays
> + Fortran
> + Put all local arrays on stack.
> +
> ?fmodule-private
> ?Fortran
> ?Set default accessibility of module entities to PRIVATE.
> Index: invoke.texi
> ===================================================================
> *** invoke.texi (revision 172206)
> --- invoke.texi (working copy)
> *************** and warnings}.
> *** 167,172 ****
> --- 167,173 ----
> ?-fbounds-check -fcheck-array-temporaries ?-fmax-array-constructor =@var{n} @gol
> ?-fcheck=@var{<all|array-temps|bounds|do|mem|pointer|recursion>} @gol
> ?-fcoarray=@var{<none|single|lib>} -fmax-stack-var-size=@var{n} @gol
> + -fstack-arrays @gol
> ?-fpack-derived ?-frepack-arrays ?-fshort-enums ?-fexternal-blas @gol
> ?-fblas-matmul-limit=@var{n} -frecursive -finit-local-zero @gol
> ?-finit-integer=@var{n} -finit-real=@var{<zero|inf|-inf|nan|snan>} @gol
> *************** Future versions of GNU Fortran may impro
> *** 1361,1366 ****
> --- 1362,1374 ----
>
> ?The default value for @var{n} is 32768.
>
> + @item -fstack-arrays
> + @opindex @code{fstack-arrays}
> + Adding this option will make the fortran compiler put all local arrays,
> + even those of unknown size onto stack memory. ?If your program uses very
> + large local arrays it's possible that you'll have to extend your runtime
> + limits for stack memory on some operating systems.
> +
> ?@item -fpack-derived
> ?@opindex @code{fpack-derived}
> ?@cindex structure packing
> Index: options.c
> ===================================================================
> *** options.c ? (revision 172206)
> --- options.c ? (working copy)
> *************** gfc_init_options (unsigned int decoded_o
> *** 123,128 ****
> --- 123,129 ----
>
> ? ?/* Default value of flag_max_stack_var_size is set in gfc_post_options. ?*/
> ? ?gfc_option.flag_max_stack_var_size = -2;
> + ? gfc_option.flag_stack_arrays = 0;
>
> ? ?gfc_option.flag_range_check = 1;
> ? ?gfc_option.flag_pack_derived = 0;
> *************** gfc_handle_option (size_t scode, const c
> *** 783,788 ****
> --- 784,793 ----
> ? ? ? ?gfc_option.flag_max_stack_var_size = value;
> ? ? ? ?break;
>
> + ? ? case OPT_fstack_arrays:
> + ? ? ? gfc_option.flag_stack_arrays = value;
> + ? ? ? break;
> +
> ? ? ?case OPT_fmodule_private:
> ? ? ? ?gfc_option.flag_module_private = value;
> ? ? ? ?break;
>
--
The knack of flying is learning how to throw yourself at the ground and miss.
? ? ?? --Hitchhikers Guide to the Galaxy