This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[4.7][google]Support for getting CPU type and feature information at run-time. (issue4893046)
- From: tmsriram at google dot com (Sriraman Tallam)
- To: reply at codereview dot appspotmail dot com, gcc-patches at gcc dot gnu dot org
- Date: Tue, 16 Aug 2011 13:50:46 -0700 (PDT)
- Subject: [4.7][google]Support for getting CPU type and feature information at run-time. (issue4893046)
Support for getting CPU type and feature information at run-time.
The following patch provides support for finding the platform type at run-time, like cpu type and features supported. The multi-versioning framework will use the builtins added to dispatch the right function version. Please refer to http://gcc.gnu.org/ml/gcc/2011-08/msg00298.html for details on function multi-versioning usability.
* tree-pass.h (pass_tree_fold_builtin_target): New pass.
* builtins.def (BUILT_IN_TARGET_SUPPORTS_CMOV): New builtin.
(BUILT_IN_TARGET_SUPPORTS_MMX): New builtin.
(BUILT_IN_TARGET_SUPPORTS_POPCOUNT): New builtin.
(BUILT_IN_TARGET_SUPPORTS_SSE): New builtin.
(BUILT_IN_TARGET_SUPPORTS_SSE2): New builtin.
(BUILT_IN_TARGET_SUPPORTS_SSE3): New builtin.
(BUILT_IN_TARGET_SUPPORTS_SSSE3): New builtin.
(BUILT_IN_TARGET_SUPPORTS_SSE4_1): New builtin.
(BUILT_IN_TARGET_SUPPORTS_SSE4_2): New builtin.
(BUILT_IN_TARGET_IS_AMD): New builtin.
(BUILT_IN_TARGET_IS_INTEL): New builtin.
(BUILT_IN_TARGET_IS_COREI7_NEHALEM): New builtin.
(BUILT_IN_TARGET_IS_COREI7_WESTMERE): New builtin.
(BUILT_IN_TARGET_IS_COREI7_SANDYBRIDGE): New builtin.
(BUILT_IN_TARGET_IS_AMDFAM10_BARCELONA): New builtin.
(BUILT_IN_TARGET_IS_AMDFAM10_SHANGHAI): New builtin.
(BUILT_IN_TARGET_IS_AMDFAM10_ISTANBUL): New builtin.
* mversn-dispatch.c (do_fold_builtin_target): New function.
(gate_fold_builtin_target): New function.
(pass_tree_fold_builtin_target): New pass.
* timevar.def (TV_FOLD_BUILTIN_TARGET): New var.
* passes.c (init_optimization_passes): Add new pass to pass list.
* config/i386/i386.c (build_struct_with_one_bit_fields): New function.
(make_var_decl): New function.
(get_field_from_struct): New function.
(make_constructor_to_get_target_type): New function.
(fold_builtin_target): New function.
(ix86_fold_builtin): New function.
(TARGET_FOLD_BUILTIN): New macro.
* gcc.dg/builtin_target.c: New test.
* config/i386/i386-cpuinfo.c: New file.
* config/i386/t-cpuinfo: New file.
* config.host: Add t-cpuinfo to link i386-cpuinfo.o with libgcc
Index: libgcc/config.host
===================================================================
--- libgcc/config.host (revision 177767)
+++ libgcc/config.host (working copy)
@@ -609,7 +609,7 @@ case ${host} in
i[34567]86-*-linux* | x86_64-*-linux* | \
i[34567]86-*-kfreebsd*-gnu | i[34567]86-*-knetbsd*-gnu | \
i[34567]86-*-gnu*)
- tmake_file="${tmake_file} t-tls"
+ tmake_file="${tmake_file} t-tls i386/t-cpuinfo"
if test "$libgcc_cv_cfi" = "yes"; then
tmake_file="${tmake_file} t-stack i386/t-stack-i386"
fi
Index: libgcc/config/i386/t-cpuinfo
===================================================================
--- libgcc/config/i386/t-cpuinfo (revision 0)
+++ libgcc/config/i386/t-cpuinfo (revision 0)
@@ -0,0 +1,2 @@
+# This is an endfile
+LIB2ADD += $(srcdir)/config/i386/i386-cpuinfo.c
Index: libgcc/config/i386/i386-cpuinfo.c
===================================================================
--- libgcc/config/i386/i386-cpuinfo.c (revision 0)
+++ libgcc/config/i386/i386-cpuinfo.c (revision 0)
@@ -0,0 +1,275 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+ * Contributed by Sriraman Tallam <tmsriram@google.com>.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This code is adapted from gcc/config/i386/driver-i386.c. The CPUID
+ * instruction is used to figure out the cpu type and supported features.
+ * GCC runs __cpu_indicator_init from a constructor which sets the members
+ * of __cpu_model and __cpu_features.
+ */
+
+#include <string.h>
+
+#ifdef __GNUC__
+#include "cpuid.h"
+
+enum processor_type
+{
+ PROCESSOR_PENTIUM = 0,
+ PROCESSOR_CORE2,
+ PROCESSOR_COREI7_NEHALEM,
+ PROCESSOR_COREI7_WESTMERE,
+ PROCESSOR_COREI7_SANDYBRIDGE,
+ PROCESSOR_INTEL_GENERIC,
+ PROCESSOR_AMDFAM10_BARCELONA,
+ PROCESSOR_AMDFAM10_SHANGHAI,
+ PROCESSOR_AMDFAM10_ISTANBUL,
+ PROCESSOR_AMDFAM10_GENERIC,
+ PROCESSOR_AMD_GENERIC,
+ PROCESSOR_GENERIC,
+ PROCESSOR_max
+};
+
+enum vendor_signatures
+{
+ SIG_INTEL = 0x756e6547 /* Genu */,
+ SIG_AMD = 0x68747541 /* Auth */
+};
+
+
+/* Features supported. */
+
+struct __processor_features
+{
+ unsigned int __cpu_cmov : 1;
+ unsigned int __cpu_mmx : 1;
+ unsigned int __cpu_popcnt : 1;
+ unsigned int __cpu_sse : 1;
+ unsigned int __cpu_sse2 : 1;
+ unsigned int __cpu_sse3 : 1;
+ unsigned int __cpu_ssse3 : 1;
+ unsigned int __cpu_sse4_1 : 1;
+ unsigned int __cpu_sse4_2 : 1;
+};
+
+/* Flags exported. */
+
+struct __processor_model
+{
+ unsigned int __cpu_is_amd : 1;
+ unsigned int __cpu_is_intel : 1;
+ unsigned int __cpu_is_corei7_nehalem : 1;
+ unsigned int __cpu_is_corei7_westmere : 1;
+ unsigned int __cpu_is_corei7_sandybridge : 1;
+ unsigned int __cpu_is_amdfam10_barcelona : 1;
+ unsigned int __cpu_is_amdfam10_shanghai : 1;
+ unsigned int __cpu_is_amdfam10_istanbul : 1;
+};
+
+enum processor_type __cpu_type = PROCESSOR_GENERIC;
+struct __processor_features __cpu_features;
+struct __processor_model __cpu_model;
+
+static void
+get_amd_cpu (unsigned int family, unsigned int model)
+{
+ switch (family)
+ {
+ case 0x10:
+ switch (model)
+ {
+ case 0x2:
+ __cpu_type = PROCESSOR_AMDFAM10_BARCELONA;
+ __cpu_model.__cpu_is_amdfam10_barcelona = 1;
+ break;
+ case 0x4:
+ __cpu_type = PROCESSOR_AMDFAM10_SHANGHAI;
+ __cpu_model.__cpu_is_amdfam10_shanghai = 1;
+ break;
+ case 0x8:
+ __cpu_type = PROCESSOR_AMDFAM10_ISTANBUL;
+ __cpu_model.__cpu_is_amdfam10_istanbul = 1;
+ break;
+ default:
+ __cpu_type = PROCESSOR_AMDFAM10_GENERIC;
+ break;
+ }
+ break;
+ default:
+ __cpu_type = PROCESSOR_AMD_GENERIC;
+ }
+}
+
+static void
+get_intel_cpu (unsigned int family, unsigned int model, unsigned int brand_id)
+{
+ /* Parse family and model only if brand ID is 0. */
+ if (brand_id == 0)
+ {
+ switch (family)
+ {
+ case 0x5:
+ __cpu_type = PROCESSOR_PENTIUM;
+ break;
+ case 0x6:
+ switch (model)
+ {
+ case 0x1a:
+ case 0x1e:
+ case 0x1f:
+ case 0x2e:
+ /* Nehalem. */
+ __cpu_type = PROCESSOR_COREI7_NEHALEM;
+ __cpu_model.__cpu_is_corei7_nehalem = 1;
+ break;
+ case 0x25:
+ case 0x2c:
+ case 0x2f:
+ /* Westmere. */
+ __cpu_type = PROCESSOR_COREI7_WESTMERE;
+ __cpu_model.__cpu_is_corei7_westmere = 1;
+ break;
+ case 0x2a:
+ /* Sandy Bridge. */
+ __cpu_type = PROCESSOR_COREI7_SANDYBRIDGE;
+ __cpu_model.__cpu_is_corei7_sandybridge = 1;
+ break;
+ case 0x17:
+ case 0x1d:
+ /* Penryn. */
+ case 0x0f:
+ /* Merom. */
+ __cpu_type = PROCESSOR_CORE2;
+ break;
+ default:
+ __cpu_type = PROCESSOR_INTEL_GENERIC;
+ break;
+ }
+ break;
+ default:
+ /* We have no idea. */
+ __cpu_type = PROCESSOR_INTEL_GENERIC;
+ break;
+ }
+ }
+}
+
+static void
+get_available_features (unsigned int ecx, unsigned int edx)
+{
+ __cpu_features.__cpu_cmov = (edx & bit_CMOV) ? 1 : 0;
+ __cpu_features.__cpu_mmx = (edx & bit_MMX) ? 1 : 0;
+ __cpu_features.__cpu_sse = (edx & bit_SSE) ? 1 : 0;
+ __cpu_features.__cpu_sse2 = (edx & bit_SSE2) ? 1 : 0;
+ __cpu_features.__cpu_popcnt = (ecx & bit_POPCNT) ? 1 : 0;
+ __cpu_features.__cpu_sse3 = (ecx & bit_SSE3) ? 1 : 0;
+ __cpu_features.__cpu_ssse3 = (ecx & bit_SSSE3) ? 1 : 0;
+ __cpu_features.__cpu_sse4_1 = (ecx & bit_SSE4_1) ? 1 : 0;
+ __cpu_features.__cpu_sse4_2 = (ecx & bit_SSE4_2) ? 1 : 0;
+}
+
+/* A noinline function calling __get_cpuid. Having many calls to
+ cpuid in one function in 32-bit mode causes GCC to complain:
+ "canâ??t find a register in class â??CLOBBERED_REGSâ??". This is
+ related to PR rtl-optimization 44174. */
+
+static int __attribute__ ((noinline))
+__get_cpuid_output (unsigned int __level,
+ unsigned int *__eax, unsigned int *__ebx,
+ unsigned int *__ecx, unsigned int *__edx)
+{
+ return __get_cpuid (__level, __eax, __ebx, __ecx, __edx);
+}
+
+/* This function will be linked in to binaries that need to look up
+ CPU information. */
+
+void
+__cpu_indicator_init(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ int max_level = 5;
+ unsigned int vendor;
+ unsigned int model, family, brand_id;
+
+ memset (&__cpu_features, 0, sizeof (struct __processor_features));
+ memset (&__cpu_model, 0, sizeof (struct __processor_model));
+
+ /* Assume cpuid insn present. Run in level 0 to get vendor id. */
+ if (!__get_cpuid_output (0, &eax, &ebx, &ecx, &edx))
+ return;
+
+ vendor = ebx;
+ max_level = eax;
+
+ if (max_level < 1)
+ return;
+
+ if (!__get_cpuid_output (1, &eax, &ebx, &ecx, &edx))
+ return;
+
+ model = (eax >> 4) & 0x0f;
+ family = (eax >> 8) & 0x0f;
+ brand_id = ebx & 0xff;
+
+ /* Adjust model and family for Intel CPUS. */
+ if (vendor == SIG_INTEL)
+ {
+ unsigned int extended_model, extended_family;
+
+ extended_model = (eax >> 12) & 0xf0;
+ extended_family = (eax >> 20) & 0xff;
+ if (family == 0x0f)
+ {
+ family += extended_family;
+ model += extended_model;
+ }
+ else if (family == 0x06)
+ model += extended_model;
+ }
+
+ /* Find CPU model. */
+
+ if (vendor == SIG_AMD)
+ {
+ __cpu_model.__cpu_is_amd = 1;
+ get_amd_cpu (family, model);
+ }
+ else if (vendor == SIG_INTEL)
+ {
+ __cpu_model.__cpu_is_intel = 1;
+ get_intel_cpu (family, model, brand_id);
+ }
+
+ /* Find available features. */
+ get_available_features (ecx, edx);
+}
+
+#else
+
+void
+__cpu_indicator_init(void)
+{
+}
+
+#endif /* __GNUC__ */
Index: gcc/tree-pass.h
===================================================================
--- gcc/tree-pass.h (revision 177767)
+++ gcc/tree-pass.h (working copy)
@@ -449,6 +449,7 @@ extern struct gimple_opt_pass pass_split_functions
extern struct gimple_opt_pass pass_feedback_split_functions;
extern struct gimple_opt_pass pass_threadsafe_analyze;
extern struct gimple_opt_pass pass_tree_convert_builtin_dispatch;
+extern struct gimple_opt_pass pass_tree_fold_builtin_target;
/* IPA Passes */
extern struct simple_ipa_opt_pass pass_ipa_lower_emutls;
Index: gcc/testsuite/gcc.dg/builtin_target.c
===================================================================
--- gcc/testsuite/gcc.dg/builtin_target.c (revision 0)
+++ gcc/testsuite/gcc.dg/builtin_target.c (revision 0)
@@ -0,0 +1,49 @@
+/* This test checks if the __builtin_target_* calls are recognized. */
+
+/* { dg-do run } */
+
+int
+fn1 ()
+{
+ if (__builtin_target_supports_cmov () < 0)
+ return -1;
+ if (__builtin_target_supports_mmx () < 0)
+ return -1;
+ if (__builtin_target_supports_popcount () < 0)
+ return -1;
+ if (__builtin_target_supports_sse () < 0)
+ return -1;
+ if (__builtin_target_supports_sse2 () < 0)
+ return -1;
+ if (__builtin_target_supports_sse3 () < 0)
+ return -1;
+ if (__builtin_target_supports_ssse3 () < 0)
+ return -1;
+ if (__builtin_target_supports_sse4_1 () < 0)
+ return -1;
+ if (__builtin_target_supports_sse4_2 () < 0)
+ return -1;
+ if (__builtin_target_is_amd () < 0)
+ return -1;
+ if (__builtin_target_is_intel () < 0)
+ return -1;
+ if (__builtin_target_is_corei7_nehalem () < 0)
+ return -1;
+ if (__builtin_target_is_corei7_westmere () < 0)
+ return -1;
+ if (__builtin_target_is_corei7_sandybridge () < 0)
+ return -1;
+ if (__builtin_target_is_amdfam10_barcelona () < 0)
+ return -1;
+ if (__builtin_target_is_amdfam10_shanghai () < 0)
+ return -1;
+ if (__builtin_target_is_amdfam10_istanbul () < 0)
+ return -1;
+
+ return 0;
+}
+
+int main ()
+{
+ return fn1 ();
+}
Index: gcc/builtins.def
===================================================================
--- gcc/builtins.def (revision 177767)
+++ gcc/builtins.def (working copy)
@@ -763,6 +763,25 @@ DEF_BUILTIN (BUILT_IN_EMUTLS_REGISTER_COMMON,
/* Multiversioning builtin dispatch hook. */
DEF_GCC_BUILTIN (BUILT_IN_DISPATCH, "dispatch", BT_FN_INT_PTR_FN_INT_PTR_PTR_VAR, ATTR_NULL)
+/* Builtins to determine target type and features at run-time. */
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_CMOV, "target_supports_cmov", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_MMX, "target_supports_mmx", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_POPCOUNT, "target_supports_popcount", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_SSE, "target_supports_sse", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_SSE2, "target_supports_sse2", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_SSE3, "target_supports_sse3", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_SSSE3, "target_supports_ssse3", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_SSE4_1, "target_supports_sse4_1", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_SSE4_2, "target_supports_sse4_2", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_AMD, "target_is_amd", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_INTEL, "target_is_intel", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_COREI7_NEHALEM, "target_is_corei7_nehalem", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_COREI7_WESTMERE, "target_is_corei7_westmere", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_COREI7_SANDYBRIDGE, "target_is_corei7_sandybridge", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_AMDFAM10_BARCELONA, "target_is_amdfam10_barcelona", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_AMDFAM10_SHANGHAI, "target_is_amdfam10_shanghai", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_AMDFAM10_ISTANBUL, "target_is_amdfam10_istanbul", BT_FN_INT, ATTR_NULL)
+
/* Exception support. */
DEF_BUILTIN_STUB (BUILT_IN_UNWIND_RESUME, "__builtin_unwind_resume")
DEF_BUILTIN_STUB (BUILT_IN_CXA_END_CLEANUP, "__builtin_cxa_end_cleanup")
Index: gcc/mversn-dispatch.c
===================================================================
--- gcc/mversn-dispatch.c (revision 177767)
+++ gcc/mversn-dispatch.c (working copy)
@@ -135,6 +135,7 @@ along with GCC; see the file COPYING3. If not see
#include "output.h"
#include "vecprim.h"
#include "gimple-pretty-print.h"
+#include "target.h"
typedef struct cgraph_node* NODEPTR;
DEF_VEC_P (NODEPTR);
@@ -1764,3 +1765,103 @@ struct gimple_opt_pass pass_tree_convert_builtin_d
TODO_update_ssa | TODO_verify_ssa
}
};
+
+/* Fold calls to __builtin_target_* */
+
+static unsigned int
+do_fold_builtin_target (void)
+{
+ basic_block bb;
+ gimple_stmt_iterator gsi;
+
+ /* Go through each stmt looking for __builtin_target_* calls */
+ FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (current_function_decl))
+ {
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ gimple assign_stmt;
+ tree call_decl;
+ tree lhs_retval;
+ tree folded_val;
+
+ tree ssa_var, tmp_var;
+ gimple init_stmt;
+
+ if (!is_gimple_call (stmt))
+ continue;
+
+ call_decl = gimple_call_fndecl (stmt);
+
+ /* Check if it is a __builtin_target_* call. */
+
+ if (call_decl == NULL
+ || DECL_NAME (call_decl) == NULL_TREE
+ || DECL_BUILT_IN_CLASS (call_decl) != BUILT_IN_NORMAL
+ || strstr (IDENTIFIER_POINTER (DECL_NAME (call_decl)),
+ "__builtin_target") == NULL)
+ continue;
+
+ /* If the lhs is NULL there is no need to fold the call. */
+ lhs_retval = gimple_call_lhs(stmt);
+ if (lhs_retval == NULL)
+ continue;
+
+ /* Call the target hook to fold the builtin */
+ folded_val = targetm.fold_builtin(call_decl, 0, NULL, false);
+
+ /* If the target does not support the builtin then fold it to zero. */
+ if (folded_val == NULL_TREE)
+ folded_val = build_zero_cst (unsigned_type_node);
+
+ /* Type cast unsigned value to integer */
+ tmp_var = create_tmp_var (unsigned_type_node, NULL);
+ init_stmt = gimple_build_assign (tmp_var, folded_val);
+ ssa_var = make_ssa_name (tmp_var, init_stmt);
+ gimple_assign_set_lhs (init_stmt, ssa_var);
+ mark_symbols_for_renaming (init_stmt);
+
+ assign_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs_retval, ssa_var, 0);
+ mark_symbols_for_renaming(assign_stmt);
+
+ gsi_insert_after_without_update (&gsi, assign_stmt, GSI_SAME_STMT);
+ gsi_insert_after_without_update (&gsi, init_stmt, GSI_SAME_STMT);
+ /* Delete the original call. */
+ gsi_remove(&gsi, true);
+ }
+ }
+
+ return 0;
+}
+
+static bool
+gate_fold_builtin_target (void)
+{
+ return true;
+}
+
+/* Pass to fold __builtin_target_* functions */
+
+struct gimple_opt_pass pass_tree_fold_builtin_target =
+{
+ {
+ GIMPLE_PASS,
+ "fold_builtin_target", /* name */
+ gate_fold_builtin_target, /* gate */
+ do_fold_builtin_target, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_FOLD_BUILTIN_TARGET, /* tv_id */
+ PROP_cfg, /* properties_required */
+ PROP_cfg, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func | /* todo_flags_finish */
+ TODO_cleanup_cfg |
+ TODO_update_ssa |
+ TODO_verify_ssa
+ }
+};
+
+
Index: gcc/timevar.def
===================================================================
--- gcc/timevar.def (revision 177767)
+++ gcc/timevar.def (working copy)
@@ -124,6 +124,7 @@ DEFTIMEVAR (TV_PARSE_INMETH , "parser inl
DEFTIMEVAR (TV_TEMPLATE_INST , "template instantiation")
DEFTIMEVAR (TV_INLINE_HEURISTICS , "inline heuristics")
DEFTIMEVAR (TV_MVERSN_DISPATCH , "multiversion dispatch")
+DEFTIMEVAR (TV_FOLD_BUILTIN_TARGET , "fold __builtin_target calls")
DEFTIMEVAR (TV_INTEGRATION , "integration")
DEFTIMEVAR (TV_TREE_GIMPLIFY , "tree gimplify")
DEFTIMEVAR (TV_TREE_EH , "tree eh")
Index: gcc/passes.c
===================================================================
--- gcc/passes.c (revision 177767)
+++ gcc/passes.c (working copy)
@@ -1249,6 +1249,8 @@ init_optimization_passes (void)
{
struct opt_pass **p = &pass_ipa_multiversion_dispatch.pass.sub;
NEXT_PASS (pass_tree_convert_builtin_dispatch);
+ /* Fold calls to __builtin_target_*. */
+ NEXT_PASS (pass_tree_fold_builtin_target);
/* Rebuilding cgraph edges is necessary as the above passes change
the call graph. Otherwise, future optimizations use the old
call graph and make wrong decisions sometimes.*/
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c (revision 177767)
+++ gcc/config/i386/i386.c (working copy)
@@ -58,6 +58,8 @@ along with GCC; see the file COPYING3. If not see
#include "sched-int.h"
#include "sbitmap.h"
#include "fibheap.h"
+#include "tree-flow.h"
+#include "tree-pass.h"
enum upper_128bits_state
{
@@ -7867,6 +7869,338 @@ ix86_build_builtin_va_list (void)
return ret;
}
+/* Returns a struct type with name NAME and number of fields equal to
+ NUM_FIELDS. Each field is a unsigned int bit field of length 1 bit. */
+
+static tree
+build_struct_with_one_bit_fields (int num_fields, const char *name)
+{
+ int i;
+ char field_name [10];
+ tree field = NULL_TREE, field_chain = NULL_TREE;
+ tree type = make_node (RECORD_TYPE);
+
+ strcpy (field_name, "k_field");
+
+ for (i = 0; i < num_fields; i++)
+ {
+ /* Name the fields, 0_field, 1_field, ... */
+ field_name [0] = '0' + i;
+ field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+ get_identifier (field_name), unsigned_type_node);
+ DECL_BIT_FIELD (field) = 1;
+ DECL_SIZE (field) = bitsize_one_node;
+ if (field_chain != NULL_TREE)
+ DECL_CHAIN (field) = field_chain;
+ field_chain = field;
+ }
+ finish_builtin_struct (type, name, field_chain, NULL_TREE);
+ return type;
+}
+
+/* Returns a VAR_DECL of type TYPE and name NAME. */
+
+static tree
+make_var_decl (tree type, const char *name)
+{
+ tree new_decl;
+ struct varpool_node *vnode;
+
+ new_decl = build_decl (UNKNOWN_LOCATION,
+ VAR_DECL,
+ get_identifier(name),
+ type);
+
+ DECL_EXTERNAL (new_decl) = 1;
+ TREE_STATIC (new_decl) = 1;
+ TREE_PUBLIC (new_decl) = 1;
+ DECL_INITIAL (new_decl) = 0;
+ DECL_ARTIFICIAL (new_decl) = 0;
+ DECL_PRESERVE_P (new_decl) = 1;
+
+ make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
+ assemble_variable (new_decl, 0, 0, 0);
+
+ vnode = varpool_node (new_decl);
+ gcc_assert (vnode != NULL);
+ /* Set finalized to 1, otherwise it asserts in function "write_symbol" in
+ lto-streamer-out.c. */
+ vnode->finalized = 1;
+
+ return new_decl;
+}
+
+/* Traverses the chain of fields in STRUCT_TYPE and returns the FIELD_NUM
+ numbered field. */
+
+static tree
+get_field_from_struct (tree struct_type, int field_num)
+{
+ int i;
+ tree field = TYPE_FIELDS (struct_type);
+
+ for (i = 0; i < field_num; i++, field = DECL_CHAIN(field))
+ {
+ gcc_assert (field != NULL_TREE);
+ }
+
+ return field;
+}
+
+/* Create a new static constructor that calls __cpu_indicator_init ()
+ function defined in libgcc/config/i386-cpuinfo.c which runs cpuid
+ to figure out the type of the target. */
+
+static tree
+make_constructor_to_get_target_type (const char *name)
+{
+ tree decl, type, t;
+ gimple_seq seq;
+ basic_block new_bb;
+ tree old_current_function_decl;
+
+ tree __cpu_indicator_int_decl;
+ gimple constructor_body;
+
+
+ type = build_function_type_list (void_type_node, NULL_TREE);
+
+ /* Make a call stmt to __cpu_indicator_init */
+ __cpu_indicator_int_decl = build_fn_decl ("__cpu_indicator_init", type);
+ constructor_body = gimple_build_call (__cpu_indicator_int_decl, 0);
+ DECL_EXTERNAL (__cpu_indicator_int_decl) = 1;
+
+ decl = build_fn_decl (name, type);
+
+ DECL_NAME (decl) = get_identifier (name);
+ SET_DECL_ASSEMBLER_NAME (decl, DECL_NAME (decl));
+ gcc_assert (cgraph_node (decl) != NULL);
+
+ TREE_USED (decl) = 1;
+ DECL_ARTIFICIAL (decl) = 1;
+ DECL_IGNORED_P (decl) = 0;
+ TREE_PUBLIC (decl) = 0;
+ DECL_UNINLINABLE (decl) = 1;
+ DECL_EXTERNAL (decl) = 0;
+ DECL_CONTEXT (decl) = NULL_TREE;
+ DECL_INITIAL (decl) = make_node (BLOCK);
+ DECL_STATIC_CONSTRUCTOR (decl) = 1;
+ TREE_READONLY (decl) = 0;
+ DECL_PURE_P (decl) = 0;
+
+ /* This is a comdat. */
+ make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
+
+ /* Build result decl and add to function_decl. */
+ t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, void_type_node);
+ DECL_ARTIFICIAL (t) = 1;
+ DECL_IGNORED_P (t) = 1;
+ DECL_RESULT (decl) = t;
+
+ gimplify_function_tree (decl);
+
+ /* Build CFG for this function. */
+
+ old_current_function_decl = current_function_decl;
+ push_cfun (DECL_STRUCT_FUNCTION (decl));
+ current_function_decl = decl;
+ init_empty_tree_cfg_for_function (DECL_STRUCT_FUNCTION (decl));
+ cfun->curr_properties |=
+ (PROP_gimple_lcf | PROP_gimple_leh | PROP_cfg | PROP_referenced_vars |
+ PROP_ssa);
+ new_bb = create_empty_bb (ENTRY_BLOCK_PTR);
+ make_edge (ENTRY_BLOCK_PTR, new_bb, EDGE_FALLTHRU);
+
+ /* XXX: Not sure if the edge commented below is necessary. If I add this
+ edge, it fails in gimple_verify_flow_info in tree-cfg.c in condition :
+ " if (e->flags & EDGE_FALLTHRU)"
+ during -fprofile-generate.
+ Otherwise, it is fine. Deleting this edge does not break anything.
+ Commenting this so that it is clear I am intentionally not doing this.*/
+ /* make_edge (new_bb, EXIT_BLOCK_PTR, EDGE_FALLTHRU); */
+
+ seq = gimple_seq_alloc_with_stmt (constructor_body);
+
+ set_bb_seq (new_bb, seq);
+ gimple_set_bb (constructor_body, new_bb);
+
+ /* Set the lexical block of the constructor body. Fails the inliner
+ other wise. */
+ gimple_set_block (constructor_body, DECL_INITIAL (decl));
+
+ /* This call is very important if this pass runs when the IR is in
+ SSA form. It breaks things in strange ways otherwise. */
+ init_tree_ssa (DECL_STRUCT_FUNCTION (decl));
+ /* add_referenced_var (version_selector_var); */
+
+ cgraph_add_new_function (decl, true);
+ cgraph_call_function_insertion_hooks (cgraph_node (decl));
+ cgraph_mark_needed_node (cgraph_node (decl));
+
+ pop_cfun ();
+ current_function_decl = old_current_function_decl;
+ return decl;
+}
+
+/* FNDECL is a __builtin_target_* call that is folded into an integer defined
+ in libgcc/config/i386/i386-cpuinfo.c */
+
+static tree
+fold_builtin_target (tree fndecl)
+{
+ /* This is the order of bit-fields in __processor_features in
+ i386-cpuinfo.c */
+ enum processor_features
+ {
+ F_CMOV = 0,
+ F_MMX,
+ F_POPCNT,
+ F_SSE,
+ F_SSE2,
+ F_SSE3,
+ F_SSSE3,
+ F_SSE4_1,
+ F_SSE4_2,
+ F_MAX
+ };
+
+ /* This is the order of bit-fields in __processor_model in
+ i386-cpuinfo.c */
+ enum processor_model
+ {
+ M_AMD = 0,
+ M_INTEL,
+ M_COREI7_NEHALEM,
+ M_COREI7_WESTMERE,
+ M_COREI7_SANDYBRIDGE,
+ M_AMDFAM10_BARCELONA,
+ M_AMDFAM10_SHANGHAI,
+ M_AMDFAM10_ISTANBUL,
+ M_MAX
+ };
+
+ static tree __processor_features_type = NULL_TREE;
+ static tree __cpu_features_var = NULL_TREE;
+ static tree __processor_model_type = NULL_TREE;
+ static tree __cpu_model_var = NULL_TREE;
+ static tree ctor_decl = NULL_TREE;
+ static tree field;
+ static tree which_struct;
+
+ /* Make a call to __cpu_indicatior_init in a constructor.
+ Function __cpu_indicator_init is defined in i386-cpuinfo.c. */
+ if (ctor_decl == NULL_TREE)
+ ctor_decl = make_constructor_to_get_target_type
+ ("__cpu_indicator_init_ctor");
+
+ if (__processor_features_type == NULL_TREE)
+ __processor_features_type = build_struct_with_one_bit_fields (F_MAX,
+ "__processor_features");
+
+ if (__processor_model_type == NULL_TREE)
+ __processor_model_type = build_struct_with_one_bit_fields (M_MAX,
+ "__processor_model");
+
+ if (__cpu_features_var == NULL_TREE)
+ __cpu_features_var = make_var_decl (__processor_features_type,
+ "__cpu_features");
+
+ if (__cpu_model_var == NULL_TREE)
+ __cpu_model_var = make_var_decl (__processor_model_type,
+ "__cpu_model");
+
+ /* Look at fndecl code to identify the field requested. */
+ switch (DECL_FUNCTION_CODE (fndecl))
+ {
+ case BUILT_IN_TARGET_SUPPORTS_CMOV:
+ field = get_field_from_struct (__processor_features_type, F_CMOV);
+ which_struct = __cpu_features_var;
+ break;
+ case BUILT_IN_TARGET_SUPPORTS_MMX:
+ field = get_field_from_struct (__processor_features_type, F_MMX);
+ which_struct = __cpu_features_var;
+ break;
+ case BUILT_IN_TARGET_SUPPORTS_POPCOUNT:
+ field = get_field_from_struct (__processor_features_type, F_POPCNT);
+ which_struct = __cpu_features_var;
+ break;
+ case BUILT_IN_TARGET_SUPPORTS_SSE:
+ field = get_field_from_struct (__processor_features_type, F_SSE);
+ which_struct = __cpu_features_var;
+ break;
+ case BUILT_IN_TARGET_SUPPORTS_SSE2:
+ field = get_field_from_struct (__processor_features_type, F_SSE2);
+ which_struct = __cpu_features_var;
+ break;
+ case BUILT_IN_TARGET_SUPPORTS_SSE3:
+ field = get_field_from_struct (__processor_features_type, F_SSE3);
+ which_struct = __cpu_features_var;
+ break;
+ case BUILT_IN_TARGET_SUPPORTS_SSSE3:
+ field = get_field_from_struct (__processor_features_type, F_SSE3);
+ which_struct = __cpu_features_var;
+ break;
+ case BUILT_IN_TARGET_SUPPORTS_SSE4_1:
+ field = get_field_from_struct (__processor_features_type, F_SSE4_1);
+ which_struct = __cpu_features_var;
+ break;
+ case BUILT_IN_TARGET_SUPPORTS_SSE4_2:
+ field = get_field_from_struct (__processor_features_type, F_SSE4_2);
+ which_struct = __cpu_features_var;
+ break;
+ case BUILT_IN_TARGET_IS_AMD:
+ field = get_field_from_struct (__processor_model_type, M_AMD);;
+ which_struct = __cpu_model_var;
+ break;
+ case BUILT_IN_TARGET_IS_INTEL:
+ field = get_field_from_struct (__processor_model_type, M_INTEL);;
+ which_struct = __cpu_model_var;
+ break;
+ case BUILT_IN_TARGET_IS_COREI7_NEHALEM:
+ field = get_field_from_struct (__processor_model_type, M_COREI7_NEHALEM);;
+ which_struct = __cpu_model_var;
+ break;
+ case BUILT_IN_TARGET_IS_COREI7_WESTMERE:
+ field = get_field_from_struct (__processor_model_type, M_COREI7_WESTMERE);;
+ which_struct = __cpu_model_var;
+ break;
+ case BUILT_IN_TARGET_IS_COREI7_SANDYBRIDGE:
+ field = get_field_from_struct (__processor_model_type, M_COREI7_SANDYBRIDGE);;
+ which_struct = __cpu_model_var;
+ break;
+ case BUILT_IN_TARGET_IS_AMDFAM10_BARCELONA:
+ field = get_field_from_struct (__processor_model_type, M_AMDFAM10_BARCELONA);;
+ which_struct = __cpu_model_var;
+ break;
+ case BUILT_IN_TARGET_IS_AMDFAM10_SHANGHAI:
+ field = get_field_from_struct (__processor_model_type, M_AMDFAM10_SHANGHAI);;
+ which_struct = __cpu_model_var;
+ break;
+ case BUILT_IN_TARGET_IS_AMDFAM10_ISTANBUL:
+ field = get_field_from_struct (__processor_model_type, M_AMDFAM10_ISTANBUL);;
+ which_struct = __cpu_model_var;
+ break;
+ default:
+ return NULL_TREE;
+ }
+
+ return build3 (COMPONENT_REF, TREE_TYPE (field), which_struct, field, NULL_TREE);
+}
+
+/* Folds __builtin_target_* builtins. */
+
+static tree
+ix86_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
+ tree *args ATTRIBUTE_UNUSED, bool ignore ATTRIBUTE_UNUSED)
+{
+ const char *decl_name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
+ if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
+ && strstr(decl_name, "__builtin_target") != NULL)
+ return fold_builtin_target (fndecl);
+
+ return NULL_TREE;
+}
+
/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
static void
@@ -35097,6 +35431,9 @@ ix86_autovectorize_vector_sizes (void)
#undef TARGET_BUILD_BUILTIN_VA_LIST
#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
+#undef TARGET_FOLD_BUILTIN
+#define TARGET_FOLD_BUILTIN ix86_fold_builtin
+
#undef TARGET_ENUM_VA_LIST_P
#define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
--
This patch is available for review at http://codereview.appspot.com/4893046