This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: (R5900) Implementing Vector Support
- From: Richard Henderson <rth at redhat dot com>
- To: Woon yung Liu <ysai187 at yahoo dot com>, Gcc Mailing List <gcc at gcc dot gnu dot org>
- Date: Wed, 11 May 2016 08:50:43 -1000
- Subject: Re: (R5900) Implementing Vector Support
- Authentication-results: sourceware.org; auth=none
- References: <23a57920-3e9e-05f9-e428-a7e5e89d4de9 at redhat dot com> <133091800 dot 362759 dot 1462978450117 dot JavaMail dot yahoo at mail dot yahoo dot com>
On 05/11/2016 04:54 AM, Woon yung Liu wrote:
I saw that the EE has the PMFHL.LH instruction, which loads the HI/LO
register pairs (containing the multiplication result) into a single destination
(i.e. truncates the multiplication result in the process), with the right order
too. I suppose that it would be suitable for implementing the mulm3 operation.
But if I implement mulm3, is there still a need to implement the
vec_widen_smult_hi_m and vec_widen_smult_lo_m patterns?
Of course. They're used for different things. E.g.
int out[100];
short in1[100], in2[100];
for (i = 0; i < 100; ++i)
out[i] = in1[i] * in2[i];
will use the vec_widen_smult* patterns.
I tried to implement the two patterns (vec_widen_smult_hi_m and
vec_widen_smult_lo_m), but GCC wouldn't compile due to both patterns having
the same operands. Must they be expands? If so, what sort of patterns should
the pcpyld and pcpyud instructions be? If I don't declare them differently,
I'll have the same compilation error again (due to them having the same
operands).
Yes I would think they should be expands. I would expect something like
;; ??? Could describe the result in %3, if we ever find it useful.
(define_insn "pmulth_ee"
[(set (match_operand:V8SI 0 "register_operand" "=x")
(vec_select:V8SI
(mult:V8SI
(sign_extend:V8SI (match_operand:V8HI 1 "register_operand" "d"))
(sign_extend:V8SI (match_operand:V8HI 2 "register_operand" "d")))
(parallel
[(const_int 0) (const_int 1) (const_int 4) (const_int 5)
(const_int 2) (const_int 3) (const_int 6) (const_int 7)])))
(clobber (match_scratch:V4SI 3 "=d"))]
"..."
"pmulth\t%3,%1,%2"
)
(define_insn "pmfhl_lh_ee_v8hi"
[(set (match_operand:V8HI 0 "register_operand" "=d")
(vec_select:V8HI
(match_operand:V16HI 1 "register_operand" "x")
(parallel
[(const_int 0) (const_int 2)
(const_int 8) (const_int 10)
(const_int 4) (const_int 6)
(const_int 12) (const_int 14)])))]
"..."
"pmfhl.lh\t%0"
)
;; ??? Maybe provide V4SI and V8HI versions too.
(define_insn "pmfhi_ee_v2di"
[(set (match_operand:V2DI 0 "register_operand" "=d")
(vec_select:V2DI
(match_operand:V4DI 1 "register_operand" "x")
(parallel [(const_int 2) (const_int 3)])))]
"..."
"pmfhi\t%0"
)
;; ??? Maybe provide V4SI and V8HI versions too.
(define_insn "pmflo_ee_v2di"
[(set (match_operand:V2DI 0 "register_operand" "=d")
(vec_select:V2DI
(match_operand:V4DI 1 "register_operand" "x")
(parallel [(const_int 0) (const_int 1)])))]
"..."
"pmflo\t%0"
)
;; ??? Maybe provide V4SI and V8HI versions too.
(define_insn "pcpyld_ee_v2di"
[(set (match_operand:V2DI 0 "register_operand" "=d")
(vec_select:V2DI
(vec_concat:V4DI
(match_operand:V2DI 1 "register_operand" "d")
(match_operand:V2DI 2 "register_operand" "d"))
(parallel [(const_int 0) (const_int 2)])))]
"..."
"pcpyld\t%0,%2,%1"
)
;; ??? Maybe provide V4SI and V8HI versions too.
(define_insn "pcpyud_ee_v2di"
[(set (match_operand:V2DI 0 "register_operand" "=d")
(vec_select:V2DI
(vec_concat:V4DI
(match_operand:V2DI 1 "register_operand" "d")
(match_operand:V2DI 2 "register_operand" "d"))
(parallel [(const_int 1) (const_int 3)])))]
"..."
"pcpyud\t%0,%1,%2"
)
(define_expand "mulv8hi3"
[(match_operand:V8HI 0 "register_operand")
(match_operand:V8HI 1 "register_operand")
(match_operand:V8HI 2 "register_operand")]
"..."
{
rtx hilo = gen_reg_rtx (V8SImode);
emit_insn (gen_pmulth_ee (hilo, operands[1], operands[2]));
hilo = gen_lowpart (V16HImode, hilo);
emit_insn (gen_pmfhl_lh_ee_v8hi (operands[0], hilo));
DONE;
})
(define_expand "vec_widen_smult_lo_v8qi"
[(match_operand:V4SI 0 "register_operand")
(match_operand:V8HI 1 "register_operand")
(match_operand:V8HI 2 "register_operand")]
"..."
{
rtx hilo = gen_reg_rtx (V8SImode);
rtx hi = gen_reg_rtx (V2DImode);
rtx lo = gen_reg_rtx (V2DImode);
emit_insn (gen_pmulth_ee (hilo, operands[1], operands[2]));
hilo = gen_lowpart (V4DImode, hilo);
emit_insn (gen_pmfhi_ee_v2di (hi, hilo));
emit_insn (gen_pmflo_ee_v2di (lo, hilo));
emit_insn (gen_pcpyld_ee_v2di (gen_lowpart (V2DImode, operands[0]), lo, hi));
DONE;
})
(define_expand "vec_widen_smult_hi_v8qi"
[(match_operand:V4SI 0 "register_operand")
(match_operand:V8HI 1 "register_operand")
(match_operand:V8HI 2 "register_operand")]
"..."
{
rtx hilo = gen_reg_rtx (V8SImode);
rtx hi = gen_reg_rtx (V2DImode);
rtx lo = gen_reg_rtx (V2DImode);
emit_insn (gen_pmulth_ee (hilo, operands[1], operands[2]));
hilo = gen_lowpart (V4DImode, hilo);
emit_insn (gen_pmfhi_ee_v2di (hi, hilo));
emit_insn (gen_pmflo_ee_v2di (lo, hilo));
emit_insn (gen_pcpyud_ee_v2di (gen_lowpart (V2DImode, operands[0]), lo, hi));
DONE;
})
r~