This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: (R5900) Implementing Vector Support


On 05/11/2016 04:54 AM, Woon yung Liu wrote:
I saw that the EE has the PMFHL.LH instruction, which loads the HI/LO
register pairs (containing the multiplication result) into a single destination
(i.e. truncates the multiplication result in the process), with the right order
too.  I suppose that it would be suitable for implementing the mulm3 operation.
But  if I implement mulm3, is there still a need to implement the
vec_widen_smult_hi_m and vec_widen_smult_lo_m patterns?

Of course.  They're used for different things.  E.g.

  int out[100];
  short in1[100], in2[100];

  for (i = 0; i < 100; ++i)
    out[i] = in1[i] * in2[i];

will use the vec_widen_smult* patterns.

I tried to implement the two patterns (vec_widen_smult_hi_m and
vec_widen_smult_lo_m), but GCC wouldn't compile due to both patterns having
the same operands. Must they be expands? If so, what sort of patterns should
the pcpyld and pcpyud instructions be? If I don't declare them differently,
I'll have the same compilation error again (due to them having the same
operands).

Yes I would think they should be expands.  I would expect something like

;; ??? Could describe the result in %3, if we ever find it useful.
(define_insn "pmulth_ee"
  [(set (match_operand:V8SI 0 "register_operand" "=x")
	(vec_select:V8SI
	  (mult:V8SI
	    (sign_extend:V8SI (match_operand:V8HI 1 "register_operand" "d"))
	    (sign_extend:V8SI (match_operand:V8HI 2 "register_operand" "d")))
	  (parallel
	    [(const_int 0) (const_int 1) (const_int 4) (const_int 5)
	     (const_int 2) (const_int 3) (const_int 6) (const_int 7)])))
    (clobber (match_scratch:V4SI 3 "=d"))]
  "..."
  "pmulth\t%3,%1,%2"
)

(define_insn "pmfhl_lh_ee_v8hi"
  [(set (match_operand:V8HI 0 "register_operand" "=d")
	(vec_select:V8HI
	  (match_operand:V16HI 1 "register_operand" "x")
	  (parallel
	    [(const_int 0) (const_int 2)
	     (const_int 8) (const_int 10)
	     (const_int 4) (const_int 6)
	     (const_int 12) (const_int 14)])))]
  "..."
  "pmfhl.lh\t%0"
)

;; ??? Maybe provide V4SI and V8HI versions too.
(define_insn "pmfhi_ee_v2di"
  [(set (match_operand:V2DI 0 "register_operand" "=d")
	(vec_select:V2DI
	  (match_operand:V4DI 1 "register_operand" "x")
	  (parallel [(const_int 2) (const_int 3)])))]
  "..."
  "pmfhi\t%0"
)

;; ??? Maybe provide V4SI and V8HI versions too.
(define_insn "pmflo_ee_v2di"
  [(set (match_operand:V2DI 0 "register_operand" "=d")
	(vec_select:V2DI
	  (match_operand:V4DI 1 "register_operand" "x")
	  (parallel [(const_int 0) (const_int 1)])))]
  "..."
  "pmflo\t%0"
)

;; ??? Maybe provide V4SI and V8HI versions too.
(define_insn "pcpyld_ee_v2di"
  [(set (match_operand:V2DI 0 "register_operand" "=d")
        (vec_select:V2DI
	  (vec_concat:V4DI
	    (match_operand:V2DI 1 "register_operand" "d")
	    (match_operand:V2DI 2 "register_operand" "d"))
	  (parallel [(const_int 0) (const_int 2)])))]
  "..."
  "pcpyld\t%0,%2,%1"
)

;; ??? Maybe provide V4SI and V8HI versions too.
(define_insn "pcpyud_ee_v2di"
  [(set (match_operand:V2DI 0 "register_operand" "=d")
        (vec_select:V2DI
	  (vec_concat:V4DI
	    (match_operand:V2DI 1 "register_operand" "d")
	    (match_operand:V2DI 2 "register_operand" "d"))
	  (parallel [(const_int 1) (const_int 3)])))]
  "..."
  "pcpyud\t%0,%1,%2"
)

(define_expand "mulv8hi3"
  [(match_operand:V8HI 0 "register_operand")
   (match_operand:V8HI 1 "register_operand")
   (match_operand:V8HI 2 "register_operand")]
  "..."
{
  rtx hilo = gen_reg_rtx (V8SImode);
  emit_insn (gen_pmulth_ee (hilo, operands[1], operands[2]));
  hilo = gen_lowpart (V16HImode, hilo);
  emit_insn (gen_pmfhl_lh_ee_v8hi (operands[0], hilo));
  DONE;
})

(define_expand "vec_widen_smult_lo_v8qi"
  [(match_operand:V4SI 0 "register_operand")
   (match_operand:V8HI 1 "register_operand")
   (match_operand:V8HI 2 "register_operand")]
  "..."
{
  rtx hilo = gen_reg_rtx (V8SImode);
  rtx hi = gen_reg_rtx (V2DImode);
  rtx lo = gen_reg_rtx (V2DImode);

  emit_insn (gen_pmulth_ee (hilo, operands[1], operands[2]));
  hilo = gen_lowpart (V4DImode, hilo);
  emit_insn (gen_pmfhi_ee_v2di (hi, hilo));
  emit_insn (gen_pmflo_ee_v2di (lo, hilo));
  emit_insn (gen_pcpyld_ee_v2di (gen_lowpart (V2DImode, operands[0]), lo, hi));
  DONE;
})

(define_expand "vec_widen_smult_hi_v8qi"
  [(match_operand:V4SI 0 "register_operand")
   (match_operand:V8HI 1 "register_operand")
   (match_operand:V8HI 2 "register_operand")]
  "..."
{
  rtx hilo = gen_reg_rtx (V8SImode);
  rtx hi = gen_reg_rtx (V2DImode);
  rtx lo = gen_reg_rtx (V2DImode);

  emit_insn (gen_pmulth_ee (hilo, operands[1], operands[2]));
  hilo = gen_lowpart (V4DImode, hilo);
  emit_insn (gen_pmfhi_ee_v2di (hi, hilo));
  emit_insn (gen_pmflo_ee_v2di (lo, hilo));
  emit_insn (gen_pcpyud_ee_v2di (gen_lowpart (V2DImode, operands[0]), lo, hi));
  DONE;
})



r~


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]