[PATCH] pahole guided packing

Richard Biener rguenther@suse.de
Mon Mar 13 15:23:00 GMT 2017


Just skimmed over a pahole -I -H 1 report and came up with the following
(far from complete but should catch stuff that looked important).

Bootstrap / regtest running on x86_64-unknown-linux-gnu.

Richard.

2017-03-13  Richard Biener  <rguenther@suse.de>

	* alias.c (struct alias_set_entry): Pack properly.
	* cfgloop.h (struct loop): Likewise.
	* cse.c (struct set): Likewise.
	* ipa-utils.c (struct searchc_env): Likewise.
	* loop-invariant.c (struct invariant): Likewise.
	* lra-remat.c (struct cand): Likewise.
	* recog.c (struct change_t): Likewise.
	* rtl.h (struct address_info): Likewise.
	* symbol-summary.h (function_summary): Likewise.
	* tree-loop-distribution.c (struct partition): Likewise.
	* tree-object-size.c (struct object_size_info): Likewise.
	* tree-ssa-loop-ivopts.c (struct cost_pair): Likewise.
	* tree-ssa-threadupdate.c (struct ssa_local_info_t): Likewise.
	* tree-vect-data-refs.c (struct _vect_peel_info): Likewise.
	* tree-vect-slp.c (struct _slp_oprnd_info): Likewise.
	* tree-vect-stmts.c (struct simd_call_arg_info): Likewise.
	* tree-vectorizer.h (struct _loop_vec_info): Likewise.
	(struct _stmt_vec_info): Likewise.

Index: gcc/alias.c
===================================================================
--- gcc/alias.c	(revision 246082)
+++ gcc/alias.c	(working copy)
@@ -126,15 +126,6 @@ struct GTY(()) alias_set_entry {
   /* The alias set number, as stored in MEM_ALIAS_SET.  */
   alias_set_type alias_set;
 
-  /* The children of the alias set.  These are not just the immediate
-     children, but, in fact, all descendants.  So, if we have:
-
-       struct T { struct S s; float f; }
-
-     continuing our example above, the children here will be all of
-     `int', `double', `float', and `struct S'.  */
-  hash_map<alias_set_hash, int> *children;
-
   /* Nonzero if would have a child of zero: this effectively makes this
      alias set the same as alias set zero.  */
   bool has_zero_child;
@@ -145,6 +136,15 @@ struct GTY(()) alias_set_entry {
   bool is_pointer;
   /* Nonzero if is_pointer or if one of childs have has_pointer set.  */
   bool has_pointer;
+
+  /* The children of the alias set.  These are not just the immediate
+     children, but, in fact, all descendants.  So, if we have:
+
+       struct T { struct S s; float f; }
+
+     continuing our example above, the children here will be all of
+     `int', `double', `float', and `struct S'.  */
+  hash_map<alias_set_hash, int> *children;
 };
 
 static int rtx_equal_for_memref_p (const_rtx, const_rtx);
Index: gcc/cfgloop.h
===================================================================
--- gcc/cfgloop.h	(revision 246082)
+++ gcc/cfgloop.h	(working copy)
@@ -167,21 +167,6 @@ struct GTY ((chain_next ("%h.next"))) lo
      nb_iterations.  */
   widest_int nb_iterations_estimate;
 
-  bool any_upper_bound;
-  bool any_estimate;
-  bool any_likely_upper_bound;
-
-  /* True if the loop can be parallel.  */
-  bool can_be_parallel;
-
-  /* True if -Waggressive-loop-optimizations warned about this loop
-     already.  */
-  bool warned_aggressive_loop_optimizations;
-
-  /* An integer estimation of the number of iterations.  Estimate_state
-     describes what is the state of the estimation.  */
-  enum loop_estimation estimate_state;
-
   /* If > 0, an integer, where the user asserted that for any
      I in [ 0, nb_iterations ) and for any J in
      [ I, min ( I + safelen, nb_iterations ) ), the Ith and Jth iterations
@@ -211,14 +196,29 @@ struct GTY ((chain_next ("%h.next"))) lo
      that might result in hard to track down bugs in niter/scev consumers.  */
   unsigned constraints;
 
+  /* An integer estimation of the number of iterations.  Estimate_state
+     describes what is the state of the estimation.  */
+  ENUM_BITFIELD(loop_estimation) estimate_state : 8;
+
+  unsigned any_upper_bound : 1;
+  unsigned any_estimate : 1;
+  unsigned any_likely_upper_bound : 1;
+
+  /* True if the loop can be parallel.  */
+  unsigned can_be_parallel : 1;
+
+  /* True if -Waggressive-loop-optimizations warned about this loop
+     already.  */
+  unsigned warned_aggressive_loop_optimizations : 1;
+
   /* True if this loop should never be vectorized.  */
-  bool dont_vectorize;
+  unsigned dont_vectorize : 1;
 
   /* True if we should try harder to vectorize this loop.  */
-  bool force_vectorize;
+  unsigned force_vectorize : 1;
 
   /* True if the loop is part of an oacc kernels region.  */
-  bool in_oacc_kernels_region;
+  unsigned in_oacc_kernels_region : 1;
 
   /* For SIMD loops, this is a unique identifier of the loop, referenced
      by IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LANE and IFN_GOMP_SIMD_LAST_LANE
Index: gcc/cse.c
===================================================================
--- gcc/cse.c	(revision 246082)
+++ gcc/cse.c	(working copy)
@@ -4154,10 +4154,10 @@ struct set
      The size of this field should match the size of the mode
      field of struct rtx_def (see rtl.h).  */
   ENUM_BITFIELD(machine_mode) mode : 8;
-  /* A constant equivalent for SET_SRC, if any.  */
-  rtx src_const;
   /* Hash value of constant equivalent for SET_SRC.  */
   unsigned src_const_hash;
+  /* A constant equivalent for SET_SRC, if any.  */
+  rtx src_const;
   /* Table entry for constant equivalent for SET_SRC, if any.  */
   struct table_elt *src_const_elt;
   /* Table entry for the destination address.  */
Index: gcc/ipa-utils.c
===================================================================
--- gcc/ipa-utils.c	(revision 246082)
+++ gcc/ipa-utils.c	(working copy)
@@ -58,8 +58,8 @@ ipa_print_order (FILE* out,
 
 struct searchc_env {
   struct cgraph_node **stack;
-  int stack_size;
   struct cgraph_node **result;
+  int stack_size;
   int order_pos;
   splay_tree nodes_marked_new;
   bool reduce;
Index: gcc/loop-invariant.c
===================================================================
--- gcc/loop-invariant.c	(revision 246082)
+++ gcc/loop-invariant.c	(working copy)
@@ -108,14 +108,14 @@ struct invariant
   /* The number of invariants which eqto this.  */
   unsigned eqno;
 
-  /* If we moved the invariant out of the loop, the register that contains its
-     value.  */
-  rtx reg;
-
   /* If we moved the invariant out of the loop, the original regno
      that contained its value.  */
   int orig_regno;
 
+  /* If we moved the invariant out of the loop, the register that contains its
+     value.  */
+  rtx reg;
+
   /* The definition of the invariant.  */
   struct def *def;
 
@@ -134,12 +134,12 @@ struct invariant
   /* Cost of the invariant.  */
   unsigned cost;
 
-  /* The invariants it depends on.  */
-  bitmap depends_on;
-
   /* Used for detecting already visited invariants during determining
      costs of movements.  */
   unsigned stamp;
+
+  /* The invariants it depends on.  */
+  bitmap depends_on;
 };
 
 /* Currently processed loop.  */
Index: gcc/lra-remat.c
===================================================================
--- gcc/lra-remat.c	(revision 246082)
+++ gcc/lra-remat.c	(working copy)
@@ -94,10 +94,10 @@ struct cand
 {
   /* Index of the candidates in all_cands. */
   int index;
-  /* The candidate insn.  */
-  rtx_insn *insn;
   /* Insn pseudo regno for rematerialization.  */
   int regno;
+  /* The candidate insn.  */
+  rtx_insn *insn;
   /* Non-negative if a reload pseudo is in the insn instead of the
      pseudo for rematerialization.  */
   int reload_regno;
Index: gcc/recog.c
===================================================================
--- gcc/recog.c	(revision 246082)
+++ gcc/recog.c	(working copy)
@@ -182,9 +182,9 @@ struct change_t
 {
   rtx object;
   int old_code;
+  bool unshare;
   rtx *loc;
   rtx old;
-  bool unshare;
 };
 
 static change_t *changes;
Index: gcc/rtl.h
===================================================================
--- gcc/rtl.h	(revision 246082)
+++ gcc/rtl.h	(working copy)
@@ -2021,6 +2021,9 @@ struct address_info {
   /* The address space.  */
   addr_space_t as;
 
+  /* True if this is an RTX_AUTOINC address.  */
+  bool autoinc_p;
+
   /* A pointer to the top-level address.  */
   rtx *outer;
 
@@ -2076,9 +2079,6 @@ struct address_info {
 
   /* If BASE is nonnull, this is the code of the rtx that contains it.  */
   enum rtx_code base_outer_code;
-
-  /* True if this is an RTX_AUTOINC address.  */
-  bool autoinc_p;
 };
 
 /* This is used to bundle an rtx and a mode together so that the pair
Index: gcc/symbol-summary.h
===================================================================
--- gcc/symbol-summary.h	(revision 246082)
+++ gcc/symbol-summary.h	(working copy)
@@ -205,6 +205,10 @@ private:
     return *v;
   }
 
+  /* Indicates if insertion hook is enabled.  */
+  bool m_insertion_enabled;
+  /* Indicates if the summary is released.  */
+  bool m_released;
   /* Main summary store, where summary ID is used as key.  */
   hash_map <map_hash, T *> m_map;
   /* Internal summary insertion hook pointer.  */
@@ -213,10 +217,6 @@ private:
   cgraph_node_hook_list *m_symtab_removal_hook;
   /* Internal summary duplication hook pointer.  */
   cgraph_2node_hook_list *m_symtab_duplication_hook;
-  /* Indicates if insertion hook is enabled.  */
-  bool m_insertion_enabled;
-  /* Indicates if the summary is released.  */
-  bool m_released;
   /* Symbol table the summary is registered to.  */
   symbol_table *m_symtab;
 
Index: gcc/tree-loop-distribution.c
===================================================================
--- gcc/tree-loop-distribution.c	(revision 246082)
+++ gcc/tree-loop-distribution.c	(working copy)
@@ -474,12 +474,12 @@ struct partition
   bitmap stmts;
   bitmap loops;
   bool reduction_p;
+  bool plus_one;
   enum partition_kind kind;
   /* data-references a kind != PKIND_NORMAL partition is about.  */
   data_reference_p main_dr;
   data_reference_p secondary_dr;
   tree niter;
-  bool plus_one;
 };
 
 
Index: gcc/tree-object-size.c
===================================================================
--- gcc/tree-object-size.c	(revision 246082)
+++ gcc/tree-object-size.c	(working copy)
@@ -36,9 +36,9 @@ along with GCC; see the file COPYING3.
 struct object_size_info
 {
   int object_size_type;
-  bitmap visited, reexamine;
-  int pass;
+  unsigned char pass;
   bool changed;
+  bitmap visited, reexamine;
   unsigned int *depths;
   unsigned int *stack, *tos;
 };
Index: gcc/tree-ssa-loop-ivopts.c
===================================================================
--- gcc/tree-ssa-loop-ivopts.c	(revision 246082)
+++ gcc/tree-ssa-loop-ivopts.c	(working copy)
@@ -346,12 +346,12 @@ struct cost_pair
 {
   struct iv_cand *cand;	/* The candidate.  */
   comp_cost cost;	/* The cost.  */
+  enum tree_code comp;	/* For iv elimination, the comparison.  */
   bitmap depends_on;	/* The list of invariants that have to be
 			   preserved.  */
   tree value;		/* For final value elimination, the expression for
 			   the final value of the iv.  For iv elimination,
 			   the new bound to compare with.  */
-  enum tree_code comp;	/* For iv elimination, the comparison.  */
   iv_inv_expr_ent *inv_expr; /* Loop invariant expression.  */
 };
 
Index: gcc/tree-ssa-threadupdate.c
===================================================================
--- gcc/tree-ssa-threadupdate.c	(revision 246082)
+++ gcc/tree-ssa-threadupdate.c	(working copy)
@@ -235,12 +235,12 @@ struct ssa_local_info_t
      and sharing a template for that block is considerably more difficult.  */
   basic_block template_block;
 
-  /* TRUE if we thread one or more jumps, FALSE otherwise.  */
-  bool jumps_threaded;
-
   /* Blocks duplicated for the thread.  */
   bitmap duplicate_blocks;
 
+  /* TRUE if we thread one or more jumps, FALSE otherwise.  */
+  bool jumps_threaded;
+
   /* When we have multiple paths through a joiner which reach different
      final destinations, then we may need to correct for potential
      profile insanities.  */
Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c	(revision 246082)
+++ gcc/tree-vect-data-refs.c	(working copy)
@@ -1137,8 +1137,8 @@ vect_get_data_access_cost (struct data_r
 
 typedef struct _vect_peel_info
 {
-  int npeel;
   struct data_reference *dr;
+  int npeel;
   unsigned int count;
 } *vect_peel_info;
 
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	(revision 246082)
+++ gcc/tree-vect-slp.c	(working copy)
@@ -129,8 +129,8 @@ typedef struct _slp_oprnd_info
   /* Information about the first statement, its vector def-type, type, the
      operand itself in case it's constant, and an indication if it's a pattern
      stmt.  */
-  enum vect_def_type first_dt;
   tree first_op_type;
+  enum vect_def_type first_dt;
   bool first_pattern;
   bool second_pattern;
 } *slp_oprnd_info;
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	(revision 246082)
+++ gcc/tree-vect-stmts.c	(working copy)
@@ -3074,8 +3074,8 @@ struct simd_call_arg_info
 {
   tree vectype;
   tree op;
-  enum vect_def_type dt;
   HOST_WIDE_INT linear_step;
+  enum vect_def_type dt;
   unsigned int align;
   bool simd_lane_linear;
 };
Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h	(revision 246082)
+++ gcc/tree-vectorizer.h	(working copy)
@@ -240,9 +240,6 @@ typedef struct _loop_vec_info : public v
      PARAM_MIN_VECT_LOOP_BOUND.  */
   unsigned int th;
 
-  /* Is the loop vectorizable? */
-  bool vectorizable;
-
   /* Unrolling factor  */
   int vectorization_factor;
 
@@ -277,10 +274,6 @@ typedef struct _loop_vec_info : public v
      runtime (loop versioning) misalignment check.  */
   vec<gimple *> may_misalign_stmts;
 
-  /* The unrolling factor needed to SLP the loop. In case of that pure SLP is
-     applied to the loop, i.e., no unrolling is needed, this is 1.  */
-  unsigned slp_unrolling_factor;
-
   /* Reduction cycles detected in the loop. Used in loop-aware SLP.  */
   vec<gimple *> reductions;
 
@@ -291,9 +284,16 @@ typedef struct _loop_vec_info : public v
   /* Cost vector for a single scalar iteration.  */
   vec<stmt_info_for_cost> scalar_cost_vec;
 
+  /* The unrolling factor needed to SLP the loop. In case of that pure SLP is
+     applied to the loop, i.e., no unrolling is needed, this is 1.  */
+  unsigned slp_unrolling_factor;
+
   /* Cost of a single scalar iteration.  */
   int single_scalar_iteration_cost;
 
+  /* Is the loop vectorizable? */
+  bool vectorizable;
+
   /* When we have grouped data accesses with gaps, we may introduce invalid
      memory accesses.  We peel the last iteration of the loop to prevent
      this.  */
@@ -328,13 +328,13 @@ typedef struct _loop_vec_info : public v
      vectorize this, so this field would be false.  */
   bool no_data_dependencies;
 
+  /* Mark loops having masked stores.  */
+  bool has_mask_store;
+
   /* If if-conversion versioned this loop before conversion, this is the
      loop version without if-conversion.  */
   struct loop *scalar_loop;
 
-  /* Mark loops having masked stores.  */
-  bool has_mask_store;
-
   /* For loops being epilogues of already vectorized loops
      this points to the original vectorized loop.  Otherwise NULL.  */
   _loop_vec_info *orig_loop_info;
@@ -555,6 +555,10 @@ typedef struct _stmt_vec_info {
   /* Stmt is part of some pattern (computation idiom)  */
   bool in_pattern_p;
 
+  /* Is this statement vectorizable or should it be skipped in (partial)
+     vectorization.  */
+  bool vectorizable;
+
   /* The stmt to which this info struct refers to.  */
   gimple *stmt;
 
@@ -648,23 +652,19 @@ typedef struct _stmt_vec_info {
      indicates whether the stmt needs to be vectorized.  */
   enum vect_relevant relevant;
 
-  /* Is this statement vectorizable or should it be skipped in (partial)
-     vectorization.  */
-  bool vectorizable;
-
   /* For loads if this is a gather, for stores if this is a scatter.  */
   bool gather_scatter_p;
 
   /* True if this is an access with loop-invariant stride.  */
   bool strided_p;
 
+  /* For both loads and stores.  */
+  bool simd_lane_access_p;
+
   /* Classifies how the load or store is going to be implemented
      for loop vectorization.  */
   vect_memory_access_type memory_access_type;
 
-  /* For both loads and stores.  */
-  bool simd_lane_access_p;
-
   /* For reduction loops, this is the type of reduction.  */
   enum vect_reduction_type v_reduc_type;
 



More information about the Gcc-patches mailing list