This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [aarch64}: added variable issue rate feature for falkor


Hi,

I reworked patch use a tuning flag instead of checking explicit for
CPU flavor. I will send soon an update for it, which won't use the
static variable anymore, and uses instead the SCHED-api.

I would like first to get some comments on current version.

Regards,
Kai
       Jim Wilson <jim.wilson@linaro.org>
        Kai Tietz <kai.tietz@linaro.org>

        * config/aarch64/aarch64.c (aarch64_sched_reorder): Implement
        TARGET_SCHED_REORDER hook.
        (aarch64_variable_issue): Implement TARGET_SCHED_VARIABLE_ISSUE
        hook.
	(qdf24xx_): Add AARCH64_EXTRA_TUNE_SCHED_MICRO_OPS tune flag.
        (TARGET_SCHED_REORDER): Define.
        (TARGET_SCHED_VARIABLE_ISSUE): Likewise.
        * config/aarch64/falkor.md (falkor_variable_issue): New.
	* onfig/aarch64/aarch64-tuning-flags.def (SCHED_MICRO_OPS): New flag.

Index: trunk/gcc/config/aarch64/aarch64.c
===================================================================
--- trunk.orig/gcc/config/aarch64/aarch64.c
+++ trunk/gcc/config/aarch64/aarch64.c
@@ -955,7 +955,7 @@ static const struct tune_params qdf24xx_
   &generic_branch_cost,
   &generic_approx_modes,
   4, /* memmov_cost  */
-  4, /* issue_rate  */
+  8, /* issue_rate  */
   (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
    | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops  */
   "16",	/* function_align.  */
@@ -968,7 +968,7 @@ static const struct tune_params qdf24xx_
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS, /* tune_flags.  */
+  AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS | AARCH64_EXTRA_TUNE_SCHED_MICRO_OPS, /* tune_flags.  */
   &qdf24xx_prefetch_tune
 };
 
@@ -18037,6 +18037,109 @@ aarch64_run_selftests (void)
 
 #endif /* #if CHECKING_P */
 
+/* The number of micro ops left over after issuing the last instruction in a
+   cycle.  This is subtracted from the next cycle before we start issuing insns.
+   This is initialized to 0 at the start of every basic block.  */
+static int leftover_uops = 0;
+
+/* Implement TARGET_SCHED_REORDER.  */
+
+static int
+aarch64_sched_reorder (FILE *file, int verbose,
+		       rtx_insn **ready ATTRIBUTE_UNUSED,
+		       int *n_readyp ATTRIBUTE_UNUSED,
+		       int clock)
+{
+  int can_issue_more = aarch64_sched_issue_rate ();
+
+  if ((aarch64_tune_params.extra_tuning_flags
+       & AARCH64_EXTRA_TUNE_SCHED_MICRO_OPS) != 0)
+    {
+      /* The start of a basic block.  */
+      if (clock == 0)
+	{
+	  if (leftover_uops && file && (verbose > 3))
+	    fprintf (file, ";;\tLeftover uops ignored at bb start.\n");
+
+	  leftover_uops = 0;
+	}
+
+      /* Account for issue slots left over from previous cycle.  This value
+	 can be larger than the number of issue slots per cycle, so we need
+	 to check it here before scheduling any instructions.  */
+      else if (leftover_uops)
+	{
+	  can_issue_more -= leftover_uops;
+
+	  if (file && (verbose > 3))
+	    {
+	      fprintf (file, ";;\tUse %d issue slots for leftover uops.\n",
+		       leftover_uops);
+	      fprintf (file, ";;\t%d issue slots left.\n", can_issue_more);
+	    }
+
+	  leftover_uops = 0;
+
+	  if (can_issue_more < 0)
+	    {
+	      leftover_uops = 0 - can_issue_more;
+	      can_issue_more = 0;
+
+	      if (file && (verbose > 3))
+		{
+		  fprintf (file, ";;skipping issue cycle.\n");
+		  fprintf (file, ";;\t%d uops left over.\n", leftover_uops);
+		}
+	    }
+	}
+    }
+
+  return can_issue_more;
+}
+
+/* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
+
+static int
+aarch64_variable_issue (FILE *file, int verbose,
+			rtx_insn *insn, int more)
+{
+  if (GET_CODE (PATTERN (insn)) != USE
+      && GET_CODE (PATTERN (insn)) != CLOBBER)
+    {
+      if ((aarch64_tune_params.extra_tuning_flags
+	   & AARCH64_EXTRA_TUNE_SCHED_MICRO_OPS) == 0)
+	more -= 1;
+      else
+	{
+          /* There is for now just falkor target supporting scheduling
+ 	     of micro operations. Therefore we don't need to check.  */
+	  int issue_slots = get_attr_falkor_variable_issue (insn);
+	  more -= issue_slots;
+
+	  if (file && (verbose > 3))
+	    {
+	      fprintf (file, ";;\tInsn takes %d issue slots.\n", issue_slots);
+	      fprintf (file, ";;\t%d issue slots left.\n", more);
+	    }
+
+	  /* We schedule an instruction first, and then subtract issue slots,
+	     which means the result can be negative.  We carry the extra over
+	     to the next cycle.  */
+
+	  if (more < 0)
+	    {
+	      leftover_uops = 0 - more;
+	      more = 0;
+
+	      if (file && (verbose > 3))
+		fprintf (file, ";;\t%d uops left over.\n", leftover_uops);
+	    }
+	}
+    }
+
+  return more;
+}
+
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST aarch64_address_cost
 
@@ -18265,6 +18368,12 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_SCHED_ISSUE_RATE
 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
 
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER aarch64_sched_reorder
+
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE aarch64_variable_issue
+
 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
   aarch64_sched_first_cycle_multipass_dfa_lookahead
Index: trunk/gcc/config/aarch64/falkor.md
===================================================================
--- trunk.orig/gcc/config/aarch64/falkor.md
+++ trunk/gcc/config/aarch64/falkor.md
@@ -685,3 +685,24 @@
 (define_bypass 3
   "falkor_afp_5_vxvy_mul,falkor_afp_5_vxvy_mla,falkor_afp_5_vxvy_vxvy_mul,falkor_afp_5_vxvy_vxvy_mla,falkor_afp_6_vxvy_mul,falkor_afp_6_vxvy_mla,falkor_afp_6_vxvy_vxvy_mul,falkor_afp_6_vxvy_vxvy_mla,falkor_fpdt_5_vxvy_mul,falkor_fpdt_5_vxvy_mla,falkor_fpdt_6_vxvy_mul,falkor_fpdt_6_vxvy_mla"
   "falkor_afp_5_vxvy_mla,falkor_afp_5_vxvy_vxvy_mla,falkor_afp_6_vxvy_mla,falkor_afp_6_vxvy_vxvy_mla,falkor_fpdt_5_vxvy_mla,falkor_fpdt_6_vxvy_mla")
+
+
+(define_attr "falkor_variable_issue" ""
+  (cond [
+;; A64 Instructions
+	 (eq_attr "type" "neon_fp_neg_s_q,neon_fp_neg_d_q,neon_fp_abs_s_q,neon_fp_abs_d_q,neon_fp_minmax_s_q,neon_fp_minmax_d_q,neon_fp_compare_s_q,neon_fp_compare_d_q,neon_fp_round_s_q,neon_fp_round_d_q,neon_fp_abd_s_q,neon_fp_abd_d_q,neon_fp_addsub_s_q,neon_fp_addsub_d_q,neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q,neon_fp_to_int_s_q,neon_fp_to_int_d_q,neon_int_to_fp_s_q,neon_int_to_fp_d_q,neon_fp_mla_d_q,neon_fp_mla_d_scalar_q,neon_fp_div_s,neon_fp_div_d,neon_fp_sqrt_s,neon_fp_sqrt_d,neon_shift_imm_long,neon_add_q,neon_reduc_add_q,neon_logic_q,neon_neg_q,neon_sub_q,neon_add_halve_q,neon_sub_halve_q,neon_shift_imm_q,neon_shift_reg_q,neon_minmax_q,neon_abs_q,neon_compare_q,neon_compare_zero_q,neon_tst_q,neon_reduc_add_long,neon_shift_acc_q,neon_reduc_add_acc_q,neon_abd_q,neon_abd_long,neon_qadd_q,neon_qsub_q,neon_qabs_q,neon_qneg_q,neon_sat_shift_imm_q,neon_sat_shift_reg_q,neon_mul_b_q,neon_mul_h_q,neon_mul_s_q,neon_mul_h_scalar_q,neon_mul_s_scalar_q,neon_sat_mul_b_q,neon_sat_mul_h_q,neon_sat_mul_s_q,neon_mul_b_long,neon_mul_h_long,neon_mul_s_long,neon_mul_d_long,neon_mul_h_scalar_long,neon_mul_s_scalar_long,neon_sat_mul_b_long,neon_sat_mul_h_long,neon_sat_mul_s_long,neon_sat_mul_h_scalar_q,neon_sat_mul_s_scalar_q,neon_sat_mul_h_scalar_long,neon_sat_mul_s_scalar_long,neon_mla_b_q,neon_mla_h_q,neon_mla_s_q,neon_mla_h_scalar_q,neon_mla_s_scalar_q,neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,neon_mla_h_scalar_long,neon_mla_s_scalar_long,neon_sat_mla_b_long,neon_sat_mla_h_long,neon_sat_mla_s_long,neon_sat_mla_h_scalar_long,neon_sat_mla_s_scalar_long,neon_add_halve_narrow_q,neon_sub_halve_narrow_q,neon_arith_acc,neon_load1_2reg,neon_load2_2reg,neon_load2_all_lanes,neon_load1_2reg_q,neon_load2_2reg_q,neon_load2_all_lanes_q,neon_load3_one_lane,neon_load4_one_lane,neon_ldp,neon_ldp_q,neon_from_gp_q,neon_bsl_q,neon_dup_q,neon_ext_q,neon_move_q,neon_rev_q,neon_tbl1_q,neon_permute_q,neon_cls_q,neon_cnt_q,neon_rbit_q,neon_tbl2,neon_fp_recpe_s_q,neon_fp_recpe_d_q,neon_fp_rsqrte_s_q,neon_fp_rsqrte_d_q,neon_fp_recps_s_q,neon_fp_recps_d_q,neon_fp_rsqrts_d_q,neon_store1_1reg,neon_store1_1reg_q,neon_store1_one_lane,neon_store1_one_lane_q,neon_store1_2reg,neon_store2_2reg,neon_store2_one_lane,neon_store2_one_lane_q,neon_stp,crypto_sha1_xor,crypto_sha256_fast,crypto_sha1_slow,crypto_sha256_slow,crypto_aese,f_stores,f_stored,fdivs,fdivd,sdiv,udiv")
+	   (const_int 2)
+	 (eq_attr "type" "neon_fp_cvt_narrow_s_q,neon_fp_cvt_narrow_d_q,neon_load1_3reg,neon_load3_3reg,neon_load3_all_lanes,neon_load1_3reg_q,neon_load3_3reg_q,neon_load3_all_lanes_q,neon_tbl2_q,neon_tbl3")
+	   (const_int 3)
+	 (eq_attr "type" "neon_fp_div_s_q,neon_fp_div_d_q,neon_fp_sqrt_s_q,neon_fp_sqrt_d_q,neon_add_widen,neon_sub_widen,neon_arith_acc_q,neon_load1_4reg,neon_load4_4reg,neon_load1_4reg_q,neon_load4_4reg_q,neon_load4_all_lanes,neon_load4_all_lanes_q,neon_tbl3_q,neon_tbl4,neon_store1_2reg_q,neon_store1_3reg,neon_store1_4reg,neon_store2_2reg_q,neon_store3_3reg,neon_store4_4reg,neon_store3_one_lane,neon_store3_one_lane_q,neon_store4_one_lane,neon_store4_one_lane_q,neon_stp_q")
+	   (const_int 4)
+	 (eq_attr "type" "neon_tbl4_q")
+	   (const_int 5)
+	 (eq_attr "type" "neon_store1_3reg_q,neon_store3_3reg_q")
+	   (const_int 6)
+	 (eq_attr "type" "neon_store1_4reg_q,neon_store4_4reg_q")
+	   (const_int 8)
+	 (eq_attr "type" "multiple")
+	   (const_int 2)
+	]
+	(const_int 1)))
Index: trunk/gcc/config/aarch64/aarch64-tuning-flags.def
===================================================================
--- trunk.orig/gcc/config/aarch64/aarch64-tuning-flags.def
+++ trunk/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -46,4 +46,7 @@ AARCH64_EXTRA_TUNING_OPTION ("no_ldp_stp
 
 AARCH64_EXTRA_TUNING_OPTION ("rename_load_regs", RENAME_LOAD_REGS)
 
+/* Enable micro-op scheduling instead of default  instruction one.  */
+AARCH64_EXTRA_TUNING_OPTION ("sched_microops", SCHED_MICRO_OPS)
+
 #undef AARCH64_EXTRA_TUNING_OPTION

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]