This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: nvptx offloading patches [3/n], RFD
- From: Richard Biener <richard dot guenther at gmail dot com>
- To: Jakub Jelinek <jakub at redhat dot com>
- Cc: Jan Hubicka <hubicka at ucw dot cz>, Ilya Verbin <iverbin at gmail dot com>, Bernd Schmidt <bernds at codesourcery dot com>, Thomas Schwinge <thomas at codesourcery dot com>, GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Tue, 17 Feb 2015 11:00:14 +0100
- Subject: Re: nvptx offloading patches [3/n], RFD
- Authentication-results: sourceware.org; auth=none
- References: <5454CAB9 dot 3040907 at codesourcery dot com> <20150204113817 dot GO1746 at tucnak dot redhat dot com> <CAFiYyc11FF2EeOQzqVmQu=q9-Uh-YCN0e38jUn-jmhD9gUrWsw at mail dot gmail dot com> <20150216210812 dot GO1746 at tucnak dot redhat dot com> <5388429A-DE44-4C08-8A19-D42B6E00C0A6 at gmail dot com> <20150216214349 dot GR1746 at tucnak dot redhat dot com>
On Mon, Feb 16, 2015 at 10:43 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Mon, Feb 16, 2015 at 10:35:30PM +0100, Richard Biener wrote:
>> Seeing the real format string you introduce I wonder if identifying modes
>> by their names wouldn't work in 99% of all cases (apart from PSImode
>> maybe).
>
> There are various corner cases. Plus of course sometimes insignificant, but
> sometimes very significant, floating mode changes. SFmode on one target
> might be completely different from another target.
But we can't deal with arbitrary target differences anyway - otherwise
we have generated wrong code already.
>> Also for most cases we can construct the machine mode from the type. Or
>> where that is not possible stream the extra info that is necessary
>> instead.
>
> I thought we've discussed that already on IRC. E.g. decimal modes are
> identified only by mode and nothing else, and it doesn't look like it
> can be easily derived from types in many cases (spent quite some time on
> that).
Sure, still modes and types have quite some overlap in information
so we might be able to do more compact streaming (and at the same
time not rely on the machine-mode enum). The machine-modes
of course are very compact to stream (they are basically a common set
of all possible types), and your mapping introduces kind of a cache
for common type properties.
I know that Honza wanted to make trees slimmer by taking into account
more (redundant) information from the modes associated with trees.
I'm just looking for a way to make this less of a hack (and the LTO IL
less target dependent). Not for GCC 5 for which something like your
patch is probably ok, but for the future.
>> Overall feels like a hack BTW :) can't we assign machine mode enum IDs in
>> a target independent way? I mean, it doesn't have to be densely
>> allocated?
>
> We iterate over modes, we have tons of tables indexed by modes, so if we
> introduce gaps, we'll make the compiler bigger and slower.
> If this is limited to the offloading path, like in the attached updated
> patch, the overhead for native LTO should be not measurable.
Sure.
Thanks,
Richard.
> --- gcc/passes.c.jj 2015-02-16 22:18:33.219702315 +0100
> +++ gcc/passes.c 2015-02-16 22:19:20.842917807 +0100
> @@ -2460,6 +2460,7 @@ ipa_write_summaries_1 (lto_symtab_encode
> struct lto_out_decl_state *state = lto_new_out_decl_state ();
> state->symtab_node_encoder = encoder;
>
> + lto_output_init_mode_table ();
> lto_push_out_decl_state (state);
>
> gcc_assert (!flag_wpa);
> @@ -2581,6 +2582,7 @@ ipa_write_optimization_summaries (lto_sy
> lto_symtab_encoder_iterator lsei;
> state->symtab_node_encoder = encoder;
>
> + lto_output_init_mode_table ();
> lto_push_out_decl_state (state);
> for (lsei = lsei_start_function_in_partition (encoder);
> !lsei_end_p (lsei); lsei_next_function_in_partition (&lsei))
> --- gcc/tree-streamer.h.jj 2015-02-16 22:18:33.222702266 +0100
> +++ gcc/tree-streamer.h 2015-02-16 22:19:20.843917791 +0100
> @@ -24,6 +24,7 @@ along with GCC; see the file COPYING3.
>
> #include "streamer-hooks.h"
> #include "lto-streamer.h"
> +#include "data-streamer.h"
> #include "hash-map.h"
>
> /* Cache of pickled nodes. Used to avoid writing the same node more
> @@ -91,6 +92,7 @@ void streamer_write_integer_cst (struct
> void streamer_write_builtin (struct output_block *, tree);
>
> /* In tree-streamer.c. */
> +extern unsigned char streamer_mode_table[1 << 8];
> void streamer_check_handled_ts_structures (void);
> bool streamer_tree_cache_insert (struct streamer_tree_cache_d *, tree,
> hashval_t, unsigned *);
> @@ -119,5 +121,19 @@ streamer_tree_cache_get_hash (struct str
> return cache->hashes[ix];
> }
>
> +static inline void
> +bp_pack_machine_mode (struct bitpack_d *bp, machine_mode mode)
> +{
> + streamer_mode_table[mode] = 1;
> + bp_pack_enum (bp, machine_mode, 1 << 8, mode);
> +}
> +
> +static inline machine_mode
> +bp_unpack_machine_mode (struct bitpack_d *bp)
> +{
> + return (machine_mode)
> + ((struct lto_input_block *)
> + bp->stream)->mode_table[bp_unpack_enum (bp, machine_mode, 1 << 8)];
> +}
>
> #endif /* GCC_TREE_STREAMER_H */
> --- gcc/lto-streamer-out.c.jj 2015-02-16 22:18:33.204702562 +0100
> +++ gcc/lto-streamer-out.c 2015-02-16 22:20:06.659163066 +0100
> @@ -2642,6 +2642,96 @@ produce_symtab (struct output_block *ob)
> }
>
>
> +/* Init the streamer_mode_table for output, where we collect info on what
> + machine_mode values have been streamed. */
> +void
> +lto_output_init_mode_table (void)
> +{
> + memset (streamer_mode_table, '\0', MAX_MACHINE_MODE);
> +}
> +
> +
> +/* Write the mode table. */
> +static void
> +lto_write_mode_table (void)
> +{
> + struct output_block *ob;
> + ob = create_output_block (LTO_section_mode_table);
> + bitpack_d bp = bitpack_create (ob->main_stream);
> +
> + /* Ensure that for GET_MODE_INNER (m) != VOIDmode we have
> + also the inner mode marked. */
> + for (int i = 0; i < (int) MAX_MACHINE_MODE; i++)
> + if (streamer_mode_table[i])
> + {
> + machine_mode m = (machine_mode) i;
> + if (GET_MODE_INNER (m) != VOIDmode)
> + streamer_mode_table[(int) GET_MODE_INNER (m)] = 1;
> + }
> + /* First stream modes that have GET_MODE_INNER (m) == VOIDmode,
> + so that we can refer to them afterwards. */
> + for (int pass = 0; pass < 2; pass++)
> + for (int i = 0; i < (int) MAX_MACHINE_MODE; i++)
> + if (streamer_mode_table[i] && i != (int) VOIDmode && i != (int) BLKmode)
> + {
> + machine_mode m = (machine_mode) i;
> + if ((GET_MODE_INNER (m) == VOIDmode) ^ (pass == 0))
> + continue;
> + bp_pack_value (&bp, m, 8);
> + bp_pack_enum (&bp, mode_class, MAX_MODE_CLASS, GET_MODE_CLASS (m));
> + bp_pack_value (&bp, GET_MODE_SIZE (m), 8);
> + bp_pack_value (&bp, GET_MODE_PRECISION (m), 16);
> + bp_pack_value (&bp, GET_MODE_INNER (m), 8);
> + bp_pack_value (&bp, GET_MODE_NUNITS (m), 8);
> + switch (GET_MODE_CLASS (m))
> + {
> + case MODE_FRACT:
> + case MODE_UFRACT:
> + case MODE_ACCUM:
> + case MODE_UACCUM:
> + bp_pack_value (&bp, GET_MODE_IBIT (m), 8);
> + bp_pack_value (&bp, GET_MODE_FBIT (m), 8);
> + break;
> + case MODE_FLOAT:
> + case MODE_DECIMAL_FLOAT:
> + bp_pack_string (ob, &bp, REAL_MODE_FORMAT (m)->name, true);
> + break;
> + default:
> + break;
> + }
> + bp_pack_string (ob, &bp, GET_MODE_NAME (m), true);
> + }
> + bp_pack_value (&bp, VOIDmode, 8);
> +
> + streamer_write_bitpack (&bp);
> +
> + char *section_name
> + = lto_get_section_name (LTO_section_mode_table, NULL, NULL);
> + lto_begin_section (section_name, !flag_wpa);
> + free (section_name);
> +
> + /* The entire header stream is computed here. */
> + struct lto_simple_header_with_strings header;
> + memset (&header, 0, sizeof (header));
> +
> + /* Write the header. */
> + header.major_version = LTO_major_version;
> + header.minor_version = LTO_minor_version;
> +
> + header.main_size = ob->main_stream->total_size;
> + header.string_size = ob->string_stream->total_size;
> + lto_write_data (&header, sizeof header);
> +
> + /* Put all of the gimple and the string table out the asm file as a
> + block of text. */
> + lto_write_stream (ob->main_stream);
> + lto_write_stream (ob->string_stream);
> +
> + lto_end_section ();
> + destroy_output_block (ob);
> +}
> +
> +
> /* This pass is run after all of the functions are serialized and all
> of the IPA passes have written their serialized forms. This pass
> causes the vector of all of the global decls and types used from
> @@ -2749,4 +2839,6 @@ produce_asm_for_decls (void)
> lto_symtab_encoder_delete (ob->decl_state->symtab_node_encoder);
> lto_function_decl_states.release ();
> destroy_output_block (ob);
> + if (lto_stream_offload_p)
> + lto_write_mode_table ();
> }
> --- gcc/config/pdp11/pdp11.c.jj 2015-02-16 22:18:33.209702480 +0100
> +++ gcc/config/pdp11/pdp11.c 2015-02-16 22:19:20.845917758 +0100
> @@ -107,7 +107,8 @@ const struct real_format pdp11_f_format
> false,
> false,
> false,
> - false
> + false,
> + "pdp11_f"
> };
>
> const struct real_format pdp11_d_format =
> @@ -128,7 +129,8 @@ const struct real_format pdp11_d_format
> false,
> false,
> false,
> - false
> + false,
> + "pdp11_d"
> };
>
> static void
> --- gcc/lto-section-in.c.jj 2015-02-16 22:18:33.202702595 +0100
> +++ gcc/lto-section-in.c 2015-02-16 22:19:20.845917758 +0100
> @@ -89,7 +89,8 @@ const char *lto_section_name[LTO_N_SECTI
> "inline",
> "ipcp_trans",
> "icf",
> - "offload_table"
> + "offload_table",
> + "mode_table"
> };
>
>
> @@ -262,7 +263,8 @@ lto_create_simple_input_block (struct lt
> return NULL;
>
> *datar = data;
> - return new lto_input_block (data + main_offset, header->main_size);
> + return new lto_input_block (data + main_offset, header->main_size,
> + file_data->mode_table);
> }
>
>
> --- gcc/tree-streamer-out.c.jj 2015-02-16 22:18:33.222702266 +0100
> +++ gcc/tree-streamer-out.c 2015-02-16 22:19:20.845917758 +0100
> @@ -190,7 +190,7 @@ static void
> pack_ts_fixed_cst_value_fields (struct bitpack_d *bp, tree expr)
> {
> struct fixed_value fv = TREE_FIXED_CST (expr);
> - bp_pack_enum (bp, machine_mode, MAX_MACHINE_MODE, fv.mode);
> + bp_pack_machine_mode (bp, fv.mode);
> bp_pack_var_len_int (bp, fv.data.low);
> bp_pack_var_len_int (bp, fv.data.high);
> }
> @@ -201,7 +201,7 @@ pack_ts_fixed_cst_value_fields (struct b
> static void
> pack_ts_decl_common_value_fields (struct bitpack_d *bp, tree expr)
> {
> - bp_pack_enum (bp, machine_mode, MAX_MACHINE_MODE, DECL_MODE (expr));
> + bp_pack_machine_mode (bp, DECL_MODE (expr));
> bp_pack_value (bp, DECL_NONLOCAL (expr), 1);
> bp_pack_value (bp, DECL_VIRTUAL_P (expr), 1);
> bp_pack_value (bp, DECL_IGNORED_P (expr), 1);
> @@ -325,7 +325,7 @@ pack_ts_function_decl_value_fields (stru
> static void
> pack_ts_type_common_value_fields (struct bitpack_d *bp, tree expr)
> {
> - bp_pack_enum (bp, machine_mode, MAX_MACHINE_MODE, TYPE_MODE (expr));
> + bp_pack_machine_mode (bp, TYPE_MODE (expr));
> bp_pack_value (bp, TYPE_STRING_FLAG (expr), 1);
> bp_pack_value (bp, TYPE_NO_FORCE_BLK (expr), 1);
> bp_pack_value (bp, TYPE_NEEDS_CONSTRUCTING (expr), 1);
> --- gcc/real.h.jj 2015-02-16 22:18:33.220702299 +0100
> +++ gcc/real.h 2015-02-16 22:19:20.846917741 +0100
> @@ -155,6 +155,7 @@ struct real_format
> bool has_signed_zero;
> bool qnan_msb_set;
> bool canonical_nan_lsbs_set;
> + const char *name;
> };
>
>
> --- gcc/lto-streamer.h.jj 2015-02-16 22:18:33.211702447 +0100
> +++ gcc/lto-streamer.h 2015-02-16 22:19:20.846917741 +0100
> @@ -248,6 +248,7 @@ enum lto_section_type
> LTO_section_ipcp_transform,
> LTO_section_ipa_icf,
> LTO_section_offload_table,
> + LTO_section_mode_table,
> LTO_N_SECTION_TYPES /* Must be last. */
> };
>
> @@ -312,12 +313,15 @@ class lto_input_block
> public:
> /* Special constructor for the string table, it abuses this to
> do random access but use the uhwi decoder. */
> - lto_input_block (const char *data_, unsigned int p_, unsigned int len_)
> - : data (data_), p (p_), len (len_) {}
> - lto_input_block (const char *data_, unsigned int len_)
> - : data (data_), p (0), len (len_) {}
> + lto_input_block (const char *data_, unsigned int p_, unsigned int len_,
> + const unsigned char *mode_table_)
> + : data (data_), mode_table (mode_table_), p (p_), len (len_) {}
> + lto_input_block (const char *data_, unsigned int len_,
> + const unsigned char *mode_table_)
> + : data (data_), mode_table (mode_table_), p (0), len (len_) {}
>
> const char *data;
> + const unsigned char *mode_table;
> unsigned int p;
> unsigned int len;
> };
> @@ -527,6 +531,9 @@ struct GTY(()) lto_file_decl_data
>
> /* Map assigning declarations their resolutions. */
> hash_map<tree, ld_plugin_symbol_resolution> * GTY((skip)) resolution_map;
> +
> + /* Mode translation table. */
> + const unsigned char *mode_table;
> };
>
> typedef struct lto_file_decl_data *lto_file_decl_data_ptr;
> @@ -775,6 +782,7 @@ extern void lto_input_variable_construct
> extern void lto_input_constructors_and_inits (struct lto_file_decl_data *,
> const char *);
> extern void lto_input_toplevel_asms (struct lto_file_decl_data *, int);
> +extern void lto_input_mode_table (struct lto_file_decl_data *);
> extern struct data_in *lto_data_in_create (struct lto_file_decl_data *,
> const char *, unsigned,
> vec<ld_plugin_symbol_resolution_t> );
> @@ -807,6 +815,7 @@ void lto_output_decl_state_refs (struct
> struct lto_output_stream *,
> struct lto_out_decl_state *);
> void lto_output_location (struct output_block *, struct bitpack_d *, location_t);
> +void lto_output_init_mode_table (void);
>
>
> /* In lto-cgraph.c */
> --- gcc/ipa-prop.c.jj 2015-02-16 22:18:33.219702315 +0100
> +++ gcc/ipa-prop.c 2015-02-16 22:19:20.848917709 +0100
> @@ -4868,7 +4868,7 @@ ipa_prop_read_section (struct lto_file_d
> unsigned int count;
>
> lto_input_block ib_main ((const char *) data + main_offset,
> - header->main_size);
> + header->main_size, file_data->mode_table);
>
> data_in =
> lto_data_in_create (file_data, (const char *) data + string_offset,
> @@ -5089,7 +5089,7 @@ read_replacements_section (struct lto_fi
> unsigned int count;
>
> lto_input_block ib_main ((const char *) data + main_offset,
> - header->main_size);
> + header->main_size, file_data->mode_table);
>
> data_in = lto_data_in_create (file_data, (const char *) data + string_offset,
> header->string_size, vNULL);
> --- gcc/data-streamer-in.c.jj 2015-02-16 22:18:33.224702233 +0100
> +++ gcc/data-streamer-in.c 2015-02-16 22:19:20.848917709 +0100
> @@ -70,7 +70,7 @@ string_for_index (struct data_in *data_i
> }
>
> /* Get the string stored at location LOC in DATA_IN->STRINGS. */
> - lto_input_block str_tab (data_in->strings, loc - 1, data_in->strings_len);
> + lto_input_block str_tab (data_in->strings, loc - 1, data_in->strings_len, NULL);
> len = streamer_read_uhwi (&str_tab);
> *rlen = len;
>
> --- gcc/tree-streamer-in.c.jj 2015-02-16 22:18:33.220702299 +0100
> +++ gcc/tree-streamer-in.c 2015-02-16 22:19:20.849917692 +0100
> @@ -224,7 +224,7 @@ static void
> unpack_ts_fixed_cst_value_fields (struct bitpack_d *bp, tree expr)
> {
> FIXED_VALUE_TYPE *fp = ggc_alloc<fixed_value> ();
> - fp->mode = bp_unpack_enum (bp, machine_mode, MAX_MACHINE_MODE);
> + fp->mode = bp_unpack_machine_mode (bp);
> fp->data.low = bp_unpack_var_len_int (bp);
> fp->data.high = bp_unpack_var_len_int (bp);
> TREE_FIXED_CST_PTR (expr) = fp;
> @@ -236,7 +236,7 @@ unpack_ts_fixed_cst_value_fields (struct
> static void
> unpack_ts_decl_common_value_fields (struct bitpack_d *bp, tree expr)
> {
> - DECL_MODE (expr) = bp_unpack_enum (bp, machine_mode, MAX_MACHINE_MODE);
> + DECL_MODE (expr) = bp_unpack_machine_mode (bp);
> DECL_NONLOCAL (expr) = (unsigned) bp_unpack_value (bp, 1);
> DECL_VIRTUAL_P (expr) = (unsigned) bp_unpack_value (bp, 1);
> DECL_IGNORED_P (expr) = (unsigned) bp_unpack_value (bp, 1);
> @@ -373,7 +373,7 @@ unpack_ts_type_common_value_fields (stru
> {
> machine_mode mode;
>
> - mode = bp_unpack_enum (bp, machine_mode, MAX_MACHINE_MODE);
> + mode = bp_unpack_machine_mode (bp);
> SET_TYPE_MODE (expr, mode);
> TYPE_STRING_FLAG (expr) = (unsigned) bp_unpack_value (bp, 1);
> TYPE_NO_FORCE_BLK (expr) = (unsigned) bp_unpack_value (bp, 1);
> --- gcc/ipa-inline-analysis.c.jj 2015-02-16 22:18:33.223702249 +0100
> +++ gcc/ipa-inline-analysis.c 2015-02-16 22:19:20.850917676 +0100
> @@ -4190,7 +4190,8 @@ inline_read_section (struct lto_file_dec
> unsigned int i, count2, j;
> unsigned int f_count;
>
> - lto_input_block ib ((const char *) data + main_offset, header->main_size);
> + lto_input_block ib ((const char *) data + main_offset, header->main_size,
> + file_data->mode_table);
>
> data_in =
> lto_data_in_create (file_data, (const char *) data + string_offset,
> --- gcc/ipa-icf.c.jj 2015-02-16 22:18:33.222702266 +0100
> +++ gcc/ipa-icf.c 2015-02-16 22:19:20.851917659 +0100
> @@ -1500,7 +1500,7 @@ sem_item_optimizer::read_section (lto_fi
> unsigned int count;
>
> lto_input_block ib_main ((const char *) data + main_offset, 0,
> - header->main_size);
> + header->main_size, file_data->mode_table);
>
> data_in =
> lto_data_in_create (file_data, (const char *) data + string_offset,
> --- gcc/real.c.jj 2015-02-16 22:18:33.220702299 +0100
> +++ gcc/real.c 2015-02-16 22:19:20.853917626 +0100
> @@ -3031,7 +3031,8 @@ const struct real_format ieee_single_for
> true,
> true,
> true,
> - false
> + false,
> + "ieee_single"
> };
>
> const struct real_format mips_single_format =
> @@ -3052,7 +3053,8 @@ const struct real_format mips_single_for
> true,
> true,
> false,
> - true
> + true,
> + "mips_single"
> };
>
> const struct real_format motorola_single_format =
> @@ -3073,7 +3075,8 @@ const struct real_format motorola_single
> true,
> true,
> true,
> - true
> + true,
> + "motorola_single"
> };
>
> /* SPU Single Precision (Extended-Range Mode) format is the same as IEEE
> @@ -3105,7 +3108,8 @@ const struct real_format spu_single_form
> true,
> true,
> false,
> - false
> + false,
> + "spu_single"
> };
>
> /* IEEE double-precision format. */
> @@ -3314,7 +3318,8 @@ const struct real_format ieee_double_for
> true,
> true,
> true,
> - false
> + false,
> + "ieee_double"
> };
>
> const struct real_format mips_double_format =
> @@ -3335,7 +3340,8 @@ const struct real_format mips_double_for
> true,
> true,
> false,
> - true
> + true,
> + "mips_double"
> };
>
> const struct real_format motorola_double_format =
> @@ -3356,7 +3362,8 @@ const struct real_format motorola_double
> true,
> true,
> true,
> - true
> + true,
> + "motorola_double"
> };
>
> /* IEEE extended real format. This comes in three flavors: Intel's as
> @@ -3700,7 +3707,8 @@ const struct real_format ieee_extended_m
> true,
> true,
> true,
> - true
> + true,
> + "ieee_extended_motorola"
> };
>
> const struct real_format ieee_extended_intel_96_format =
> @@ -3721,7 +3729,8 @@ const struct real_format ieee_extended_i
> true,
> true,
> true,
> - false
> + false,
> + "ieee_extended_intel_96"
> };
>
> const struct real_format ieee_extended_intel_128_format =
> @@ -3742,7 +3751,8 @@ const struct real_format ieee_extended_i
> true,
> true,
> true,
> - false
> + false,
> + "ieee_extended_intel_128"
> };
>
> /* The following caters to i386 systems that set the rounding precision
> @@ -3765,7 +3775,8 @@ const struct real_format ieee_extended_i
> true,
> true,
> true,
> - false
> + false,
> + "ieee_extended_intel_96_round_53"
> };
>
> /* IBM 128-bit extended precision format: a pair of IEEE double precision
> @@ -3853,7 +3864,8 @@ const struct real_format ibm_extended_fo
> true,
> true,
> true,
> - false
> + false,
> + "ibm_extended"
> };
>
> const struct real_format mips_extended_format =
> @@ -3874,7 +3886,8 @@ const struct real_format mips_extended_f
> true,
> true,
> false,
> - true
> + true,
> + "mips_extended"
> };
>
>
> @@ -4137,7 +4150,8 @@ const struct real_format ieee_quad_forma
> true,
> true,
> true,
> - false
> + false,
> + "ieee_quad"
> };
>
> const struct real_format mips_quad_format =
> @@ -4158,7 +4172,8 @@ const struct real_format mips_quad_forma
> true,
> true,
> false,
> - true
> + true,
> + "mips_quad"
> };
>
> /* Descriptions of VAX floating point formats can be found beginning at
> @@ -4458,7 +4473,8 @@ const struct real_format vax_f_format =
> false,
> false,
> false,
> - false
> + false,
> + "vax_f"
> };
>
> const struct real_format vax_d_format =
> @@ -4479,7 +4495,8 @@ const struct real_format vax_d_format =
> false,
> false,
> false,
> - false
> + false,
> + "vax_d"
> };
>
> const struct real_format vax_g_format =
> @@ -4500,7 +4517,8 @@ const struct real_format vax_g_format =
> false,
> false,
> false,
> - false
> + false,
> + "vax_g"
> };
>
> /* Encode real R into a single precision DFP value in BUF. */
> @@ -4576,7 +4594,8 @@ const struct real_format decimal_single_
> true,
> true,
> true,
> - false
> + false,
> + "decimal_single"
> };
>
> /* Double precision decimal floating point (IEEE 754). */
> @@ -4598,7 +4617,8 @@ const struct real_format decimal_double_
> true,
> true,
> true,
> - false
> + false,
> + "decimal_double"
> };
>
> /* Quad precision decimal floating point (IEEE 754). */
> @@ -4620,7 +4640,8 @@ const struct real_format decimal_quad_fo
> true,
> true,
> true,
> - false
> + false,
> + "decimal_quad"
> };
>
> /* Encode half-precision floats. This routine is used both for the IEEE
> @@ -4757,7 +4778,8 @@ const struct real_format ieee_half_forma
> true,
> true,
> true,
> - false
> + false,
> + "ieee_half"
> };
>
> /* ARM's alternative half-precision format, similar to IEEE but with
> @@ -4781,7 +4803,8 @@ const struct real_format arm_half_format
> true,
> true,
> false,
> - false
> + false,
> + "arm_half"
> };
>
> /* A synthetic "format" for internal arithmetic. It's the size of the
> @@ -4826,7 +4849,8 @@ const struct real_format real_internal_f
> false,
> true,
> true,
> - false
> + false,
> + "real_internal"
> };
>
> /* Calculate X raised to the integer exponent N in mode MODE and store
> --- gcc/tree-streamer.c.jj 2015-02-16 22:18:33.221702282 +0100
> +++ gcc/tree-streamer.c 2015-02-16 22:19:20.853917626 +0100
> @@ -53,6 +53,14 @@ along with GCC; see the file COPYING3.
> #include "cgraph.h"
> #include "tree-streamer.h"
>
> +/* Table indexed by machine_mode, used for 2 different purposes.
> + During streaming out we record there non-zero value for all modes
> + that were streamed out.
> + During streaming in, we translate the on the disk mode using this
> + table. For normal LTO it is set to identity, for ACCEL_COMPILER
> + depending on the mode_table content. */
> +unsigned char streamer_mode_table[1 << 8];
> +
> /* Check that all the TS_* structures handled by the streamer_write_* and
> streamer_read_* routines are exactly ALL the structures defined in
> treestruct.def. */
> --- gcc/lto/lto.c.jj 2015-02-16 22:18:33.221702282 +0100
> +++ gcc/lto/lto.c 2015-02-16 22:35:56.213523202 +0100
> @@ -85,6 +85,8 @@ static int lto_parallelism;
>
> static GTY(()) tree first_personality_decl;
>
> +static GTY(()) const unsigned char *lto_mode_identity_table;
> +
> /* Returns a hash code for P. */
>
> static hashval_t
> @@ -1877,7 +1879,7 @@ lto_read_decls (struct lto_file_decl_dat
> uint32_t num_decl_states;
>
> lto_input_block ib_main ((const char *) data + main_offset,
> - header->main_size);
> + header->main_size, decl_data->mode_table);
>
> data_in = lto_data_in_create (decl_data, (const char *) data + string_offset,
> header->string_size, resolutions);
> @@ -2219,6 +2221,11 @@ lto_file_finalize (struct lto_file_decl_
>
> file_data->renaming_hash_table = lto_create_renaming_table ();
> file_data->file_name = file->filename;
> +#ifdef ACCEL_COMPILER
> + lto_input_mode_table (file_data);
> +#else
> + file_data->mode_table = lto_mode_identity_table;
> +#endif
> data = lto_get_section_data (file_data, LTO_section_decls, NULL, &len);
> if (data == NULL)
> {
> @@ -3394,6 +3401,13 @@ lto_init (void)
> memset (<o_stats, 0, sizeof (lto_stats));
> bitmap_obstack_initialize (NULL);
> gimple_register_cfg_hooks ();
> +#ifndef ACCEL_COMPILER
> + unsigned char *table
> + = ggc_vec_alloc<unsigned char> (MAX_MACHINE_MODE);
> + for (int m = 0; m < MAX_MACHINE_MODE; m++)
> + table[m] = m;
> + lto_mode_identity_table = table;
> +#endif
> }
>
>
> --- gcc/lto-cgraph.c.jj 2015-02-16 22:18:33.211702447 +0100
> +++ gcc/lto-cgraph.c 2015-02-16 22:19:20.855917593 +0100
> @@ -2113,7 +2113,7 @@ input_cgraph_opt_section (struct lto_fil
> unsigned int count;
>
> lto_input_block ib_main ((const char *) data + main_offset,
> - header->main_size);
> + header->main_size, file_data->mode_table);
>
> data_in =
> lto_data_in_create (file_data, (const char *) data + string_offset,
> --- gcc/lto-streamer-in.c.jj 2015-02-16 22:18:33.204702562 +0100
> +++ gcc/lto-streamer-in.c 2015-02-16 22:26:53.355464202 +0100
> @@ -1116,10 +1116,12 @@ lto_read_body_or_constructor (struct lto
>
> /* Set up the struct function. */
> from = data_in->reader_cache->nodes.length ();
> - lto_input_block ib_main (data + main_offset, header->main_size);
> + lto_input_block ib_main (data + main_offset, header->main_size,
> + file_data->mode_table);
> if (TREE_CODE (node->decl) == FUNCTION_DECL)
> {
> - lto_input_block ib_cfg (data + cfg_offset, header->cfg_size);
> + lto_input_block ib_cfg (data + cfg_offset, header->cfg_size,
> + file_data->mode_table);
> input_function (fn_decl, data_in, &ib_main, &ib_cfg);
> }
> else
> @@ -1384,7 +1386,8 @@ lto_input_toplevel_asms (struct lto_file
>
> string_offset = sizeof (*header) + header->main_size;
>
> - lto_input_block ib (data + sizeof (*header), header->main_size);
> + lto_input_block ib (data + sizeof (*header), header->main_size,
> + file_data->mode_table);
>
> data_in = lto_data_in_create (file_data, data + string_offset,
> header->string_size, vNULL);
> @@ -1403,6 +1406,123 @@ lto_input_toplevel_asms (struct lto_file
> }
>
>
> +/* Input mode table. */
> +
> +void
> +lto_input_mode_table (struct lto_file_decl_data *file_data)
> +{
> + size_t len;
> + const char *data = lto_get_section_data (file_data, LTO_section_mode_table,
> + NULL, &len);
> + if (! data)
> + {
> + internal_error ("cannot read LTO mode table from %s",
> + file_data->file_name);
> + return;
> + }
> +
> + unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (1 << 8);
> + file_data->mode_table = table;
> + const struct lto_simple_header_with_strings *header
> + = (const struct lto_simple_header_with_strings *) data;
> + int string_offset;
> + struct data_in *data_in;
> + string_offset = sizeof (*header) + header->main_size;
> +
> + lto_input_block ib (data + sizeof (*header), header->main_size, NULL);
> + data_in = lto_data_in_create (file_data, data + string_offset,
> + header->string_size, vNULL);
> + bitpack_d bp = streamer_read_bitpack (&ib);
> +
> + table[VOIDmode] = VOIDmode;
> + table[BLKmode] = BLKmode;
> + unsigned int m;
> + while ((m = bp_unpack_value (&bp, 8)) != VOIDmode)
> + {
> + enum mode_class mclass
> + = bp_unpack_enum (&bp, mode_class, MAX_MODE_CLASS);
> + unsigned int size = bp_unpack_value (&bp, 8);
> + unsigned int prec = bp_unpack_value (&bp, 16);
> + machine_mode inner = (machine_mode) table[bp_unpack_value (&bp, 8)];
> + unsigned int nunits = bp_unpack_value (&bp, 8);
> + unsigned int ibit = 0, fbit = 0;
> + unsigned int real_fmt_len = 0;
> + const char *real_fmt_name = NULL;
> + switch (mclass)
> + {
> + case MODE_FRACT:
> + case MODE_UFRACT:
> + case MODE_ACCUM:
> + case MODE_UACCUM:
> + ibit = bp_unpack_value (&bp, 8);
> + fbit = bp_unpack_value (&bp, 8);
> + break;
> + case MODE_FLOAT:
> + case MODE_DECIMAL_FLOAT:
> + real_fmt_name = bp_unpack_indexed_string (data_in, &bp,
> + &real_fmt_len);
> + break;
> + default:
> + break;
> + }
> + /* First search just the GET_CLASS_NARROWEST_MODE to wider modes,
> + if not found, fallback to all modes. */
> + int pass;
> + for (pass = 0; pass < 2; pass++)
> + for (machine_mode mr = pass ? VOIDmode
> + : GET_CLASS_NARROWEST_MODE (mclass);
> + pass ? mr < MAX_MACHINE_MODE : mr != VOIDmode;
> + pass ? mr = (machine_mode) (m + 1)
> + : mr = GET_MODE_WIDER_MODE (mr))
> + if (GET_MODE_CLASS (mr) != mclass
> + || GET_MODE_SIZE (mr) != size
> + || GET_MODE_PRECISION (mr) != prec
> + || GET_MODE_INNER (mr) != inner
> + || GET_MODE_IBIT (mr) != ibit
> + || GET_MODE_FBIT (mr) != fbit
> + || GET_MODE_NUNITS (mr) != nunits)
> + continue;
> + else if ((mclass == MODE_FLOAT || mclass == MODE_DECIMAL_FLOAT)
> + && strcmp (REAL_MODE_FORMAT (mr)->name, real_fmt_name) != 0)
> + continue;
> + else
> + {
> + table[m] = mr;
> + pass = 2;
> + break;
> + }
> + unsigned int mname_len;
> + const char *mname = bp_unpack_indexed_string (data_in, &bp, &mname_len);
> + if (pass == 2)
> + {
> + switch (mclass)
> + {
> + case MODE_VECTOR_INT:
> + case MODE_VECTOR_FLOAT:
> + case MODE_VECTOR_FRACT:
> + case MODE_VECTOR_UFRACT:
> + case MODE_VECTOR_ACCUM:
> + case MODE_VECTOR_UACCUM:
> + /* For unsupported vector modes just use BLKmode,
> + if the scalar mode is supported. */
> + if (inner != VOIDmode)
> + {
> + table[m] = BLKmode;
> + break;
> + }
> + /* FALLTHRU */
> + default:
> + error ("unsupported mode %s\n", mname);
> + break;
> + }
> + }
> + }
> + lto_data_in_delete (data_in);
> +
> + lto_free_section_data (file_data, LTO_section_mode_table, NULL, data, len);
> +}
> +
> +
> /* Initialization for the LTO reader. */
>
> void
>
>
> Jakub