This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: PATH: Replace UNITS_PER_SIMD_WORD with GET_MODE_SIZE (TYPE_MODE (vectype))


On Tue, May 20, 2008 at 11:50:36AM +0200, Richard Guenther wrote:
> On Tue, 20 May 2008, Paolo Bonzini wrote:
> 
> > > > My first patch doesn't change
> > > > UNITS_PER_SIMD_WORD at all. It just removes
> > > > the hardcoded UNITS_PER_SIMD_WORD usage.
> > > > The only one left is in get_vectype_for_scalar_type.
> > > 
> > > I like this approach the most.  As for get_vectype_for_scalar_type, this
> > > probably should be a target hook (effectively taking the place of
> > > UNITS_PER_SIMD_WORD).
> > 
> > Maybe.  However, the target hook would be implemented, depending on the
> > architecture, either like this:
> > 
> >   nbytes = GET_MODE_SIZE (TYPE_MODE (scalar_type));
> >   nunits = UNITS_PER_SIMD_WORD / nbytes;
> >   vectype = build_vector_type (scalar_type, nunits);
> >   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
> >       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
> >     return NULL_TREE;
> > 
> > or like this:
> > 
> >   switch (TYPE_MODE (scalar_type))
> >     {
> >     case QImode: return build_vector_type_for_mode (scalar_type, 
> > V16QImode);
> >     case HImode: return build_vector_type_for_mode (scalar_type, V8HImode);
> >     case SImode: return build_vector_type_for_mode (scalar_type, V4SImode);
> >     ...
> >     }
> > 
> > in all the targets.  All the dumping stuff would stay in tree-vectorizer.c,
> > and even if you added a new build_vector_type_for_size function, it would add
> > more code duplication than value.
> > 
> > I think that H.J.'s patch to only use UNITS_PER_SIMD_WORD in
> > get_vectype_for_scalar_type, plus his patch to add an argument to the target
> > macro, is the simplest thing to do and it works.
> 
> Sure, that also works for me.
> 

Here is the combined patch. Tested on Linux/ia32, Linux/ia64 and
Linux/Intel64.  OK for trunk?

Thanks.

H.J.
-----
2008-05-20  H.J. Lu  <hongjiu.lu@intel.com>

	* defaults.h (UNITS_PER_SIMD_WORD): Add type as argument.
	* doc/tm.texi (UNITS_PER_SIMD_WORD): Likewise.

	* tree-vect-analyze.c (vect_compute_data_ref_alignment): Replace
	UNITS_PER_SIMD_WORD with GET_MODE_SIZE (TYPE_MODE (vectype)).
	(vect_update_misalignment_for_peel): Likewise.
	(vector_alignment_reachable_p): Likewise.
	* tree-vect-transform.c (vectorizable_load): Likewise.
	* tree-vectorizer.c (vect_supportable_dr_alignment): Likewise.

	* tree-vectorizer.c (get_vectype_for_scalar_type): Pass
	scalar_type to UNITS_PER_SIMD_WORD.

	* config/arm/arm.h (UNITS_PER_SIMD_WORD): Updated.
	* config/i386/i386.h (UNITS_PER_SIMD_WORD): Likewise.
	* config/mips/mips.h (UNITS_PER_SIMD_WORD): Likewise.
	* config/rs6000/rs6000.h (UNITS_PER_SIMD_WORD): Likewise.
	* config/sparc/sparc.h (UNITS_PER_SIMD_WORD): Likewise.

Index: doc/tm.texi
===================================================================
--- doc/tm.texi	(revision 2646)
+++ doc/tm.texi	(working copy)
@@ -990,11 +990,11 @@ Minimum number of units in a word.  If t
 smallest value that @code{UNITS_PER_WORD} can have at run-time.
 @end defmac
 
-@defmac UNITS_PER_SIMD_WORD
-Number of units in the vectors that the vectorizer can produce.
-The default is equal to @code{UNITS_PER_WORD}, because the vectorizer
-can do some transformations even in absence of specialized @acronym{SIMD}
-hardware.
+@defmac UNITS_PER_SIMD_WORD (@var{type})
+Number of units in the vectors that the vectorizer can produce for
+scalar type @var{type}.  The default is equal to @code{UNITS_PER_WORD},
+because the vectorizer can do some transformations even in absence of
+specialized @acronym{SIMD} hardware.
 @end defmac
 
 @defmac POINTER_SIZE
Index: defaults.h
===================================================================
--- defaults.h	(revision 2646)
+++ defaults.h	(working copy)
@@ -739,7 +739,7 @@ along with GCC; see the file COPYING3.  
 /* By default, only attempt to parallelize bitwise operations, and
    possibly adds/subtracts using bit-twiddling.  */
 #ifndef UNITS_PER_SIMD_WORD
-#define UNITS_PER_SIMD_WORD UNITS_PER_WORD
+#define UNITS_PER_SIMD_WORD(TYPE) UNITS_PER_WORD
 #endif
 
 /* Determine whether __cxa_atexit, rather than atexit, is used to
Index: ChangeLog.work
===================================================================
Index: tree-vectorizer.c
===================================================================
--- tree-vectorizer.c	(revision 2646)
+++ tree-vectorizer.c	(working copy)
@@ -1806,12 +1806,12 @@ get_vectype_for_scalar_type (tree scalar
   int nunits;
   tree vectype;
 
-  if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
+  if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (scalar_type))
     return NULL_TREE;
 
-  /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
+  /* FORNOW: Only a single vector size per type (UNITS_PER_SIMD_WORD)
      is expected.  */
-  nunits = UNITS_PER_SIMD_WORD / nbytes;
+  nunits = UNITS_PER_SIMD_WORD (scalar_type) / nbytes;
 
   vectype = build_vector_type (scalar_type, nunits);
   if (vect_print_dump_info (REPORT_DETAILS))
@@ -1937,11 +1937,13 @@ vect_supportable_dr_alignment (struct da
 	  && (!targetm.vectorize.builtin_mask_for_load
 	      || targetm.vectorize.builtin_mask_for_load ()))
 	{
-	    if (nested_in_vect_loop
-		&& TREE_INT_CST_LOW (DR_STEP (dr)) != UNITS_PER_SIMD_WORD)
-	      return dr_explicit_realign;
-	    else
-	      return dr_explicit_realign_optimized;
+	  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+	  if (nested_in_vect_loop
+	      && (TREE_INT_CST_LOW (DR_STEP (dr))
+		  != GET_MODE_SIZE (TYPE_MODE (vectype))))
+	    return dr_explicit_realign;
+	  else
+	    return dr_explicit_realign_optimized;
 	}
 
       if (optab_handler (movmisalign_optab, mode)->insn_code != 
Index: tree-vect-analyze.c
===================================================================
--- tree-vect-analyze.c	(revision 2646)
+++ tree-vect-analyze.c	(working copy)
@@ -1370,6 +1370,7 @@ vect_compute_data_ref_alignment (struct 
   misalign = DR_INIT (dr);
   aligned_to = DR_ALIGNED_TO (dr);
   base_addr = DR_BASE_ADDRESS (dr);
+  vectype = STMT_VINFO_VECTYPE (stmt_info);
 
   /* In case the dataref is in an inner-loop of the loop that is being
      vectorized (LOOP), we use the base and misalignment information
@@ -1382,7 +1383,7 @@ vect_compute_data_ref_alignment (struct 
       tree step = DR_STEP (dr);
       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
     
-      if (dr_step % UNITS_PER_SIMD_WORD == 0)
+      if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
         {
           if (vect_print_dump_info (REPORT_ALIGNMENT))
             fprintf (vect_dump, "inner step divides the vector-size.");
@@ -1399,7 +1400,6 @@ vect_compute_data_ref_alignment (struct 
     }
 
   base = build_fold_indirect_ref (base_addr);
-  vectype = STMT_VINFO_VECTYPE (stmt_info);
   alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
 
   if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
@@ -1541,8 +1541,9 @@ vect_update_misalignment_for_peel (struc
       && known_alignment_for_access_p (dr_peel))
     {
       int misal = DR_MISALIGNMENT (dr);
+      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
       misal += npeel * dr_size;
-      misal %= UNITS_PER_SIMD_WORD;
+      misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
       SET_DR_MISALIGNMENT (dr, misal);
       return;
     }
@@ -1622,7 +1623,7 @@ vector_alignment_reachable_p (struct dat
       if (!known_alignment_for_access_p (dr))
 	return false;
 
-      elem_size = UNITS_PER_SIMD_WORD / nelements;
+      elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
       mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
 
       if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
Index: tree-vect-transform.c
===================================================================
--- tree-vect-transform.c	(revision 2646)
+++ tree-vect-transform.c	(working copy)
@@ -5778,7 +5778,8 @@ vectorizable_load (tree stmt, block_stmt
      nested within an outer-loop that is being vectorized.  */
 
   if (nested_in_vect_loop_p (loop, stmt)
-      && (TREE_INT_CST_LOW (DR_STEP (dr)) % UNITS_PER_SIMD_WORD != 0))
+      && (TREE_INT_CST_LOW (DR_STEP (dr))
+	  % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
     {
       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
       compute_in_loop = true;
Index: config/sparc/sparc.h
===================================================================
--- config/sparc/sparc.h	(revision 2646)
+++ config/sparc/sparc.h	(working copy)
@@ -606,7 +606,7 @@ extern struct sparc_cpu_select sparc_sel
 #define MIN_UNITS_PER_WORD	4
 #endif
 
-#define UNITS_PER_SIMD_WORD	(TARGET_VIS ? 8 : UNITS_PER_WORD)
+#define UNITS_PER_SIMD_WORD(TYPE) (TARGET_VIS ? 8 : UNITS_PER_WORD)
 
 /* Now define the sizes of the C data types.  */
 
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 2646)
+++ config/i386/i386.h	(working copy)
@@ -1132,7 +1132,7 @@ do {									\
 
 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
    place emms and femms instructions.  */
-#define UNITS_PER_SIMD_WORD (TARGET_SSE ? 16 : UNITS_PER_WORD)
+#define UNITS_PER_SIMD_WORD(TYPE) (TARGET_SSE ? 16 : UNITS_PER_WORD)
 
 #define VALID_DFP_MODE_P(MODE) \
   ((MODE) == SDmode || (MODE) == DDmode || (MODE) == TDmode)
Index: config/rs6000/rs6000.h
===================================================================
--- config/rs6000/rs6000.h	(revision 2646)
+++ config/rs6000/rs6000.h	(working copy)
@@ -897,7 +897,7 @@ extern enum rs6000_nop_insertion rs6000_
 #define PAIRED_VECTOR_MODE(MODE)        \
          ((MODE) == V2SFmode)            
 
-#define UNITS_PER_SIMD_WORD					     \
+#define UNITS_PER_SIMD_WORD(TYPE)				     \
 	(TARGET_ALTIVEC ? UNITS_PER_ALTIVEC_WORD		     \
 	 : (TARGET_SPE ? UNITS_PER_SPE_WORD : (TARGET_PAIRED_FLOAT ? \
 	 UNITS_PER_PAIRED_WORD : UNITS_PER_WORD)))
Index: config/arm/arm.h
===================================================================
--- config/arm/arm.h	(revision 2646)
+++ config/arm/arm.h	(working copy)
@@ -506,7 +506,7 @@ extern int arm_arch_hwdiv;
 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
    registers when autovectorizing for Neon, at least until multiple vector
    widths are supported properly by the middle-end.  */
-#define UNITS_PER_SIMD_WORD \
+#define UNITS_PER_SIMD_WORD(TYPE) \
   (TARGET_NEON ? (TARGET_NEON_VECTORIZE_QUAD ? 16 : 8) : UNITS_PER_WORD)
 
 /* True if natural alignment is used for doubleword types.  */
Index: config/mips/mips.h
===================================================================
--- config/mips/mips.h	(revision 2646)
+++ config/mips/mips.h	(working copy)
@@ -1193,7 +1193,8 @@ enum mips_code_readable_setting {
 /* The number of bytes in a double.  */
 #define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT)
 
-#define UNITS_PER_SIMD_WORD (TARGET_PAIRED_SINGLE_FLOAT ? 8 : UNITS_PER_WORD)
+#define UNITS_PER_SIMD_WORD(TYPE) \
+  (TARGET_PAIRED_SINGLE_FLOAT ? 8 : UNITS_PER_WORD)
 
 /* Set the sizes of the core types.  */
 #define SHORT_TYPE_SIZE 16


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]