From: Uros Bizjak Date: Wed, 8 Nov 2023 20:46:26 +0000 (+0100) Subject: i386: Apply LRA reload workaround to insns with high registers [PR82524] X-Git-Tag: basepoints/gcc-15~4885 X-Git-Url: https://gcc.gnu.org/git/?a=commitdiff_plain;h=dced5ae64703507a7159972316a1dde48e5f7470;p=gcc.git i386: Apply LRA reload workaround to insns with high registers [PR82524] LRA is not able to reload zero_extracted in-out operand with matched input operand in the same way as strict_low_part in-out operand. The patch applies the strict_low_part workaround, where we allow LRA to generate an instruction with non-matched input operand, which is split post reload to the instruction that inserts non-matched input operand to an in-out operand and the instruction that uses matched operand, also to zero_extracted in-out operand case. The generated code from the pr82524.c testcase improves from: movl %esi, %ecx movl %edi, %eax movsbl %ch, %esi addl %esi, %edx movb %dl, %ah to: movl %edi, %eax movl %esi, %ecx movb %ch, %ah addb %dl, %ah The compiler is now also able to handle non-commutative operations: movl %edi, %eax movl %esi, %ecx movb %ch, %ah subb %dl, %ah and unary operations: movl %edi, %eax movl %esi, %edx movb %dh, %ah negb %ah The patch also robustifies split condition of the splitters to ensure that only alternatives with unmatched operands are split. PR target/82524 gcc/ChangeLog: * config/i386/i386.md (*add_1_slp): Split insn only for unmatched operand 0. (*sub_1_slp): Ditto. (*_1_slp): Merge pattern from "*and_1_slp" and "*_1_slp" using any_logic code iterator. Split insn only for unmatched operand 0. (*neg1_slp): Split insn only for unmatched operand 0. (*one_cmpl_1_slp): Ditto. (*ashl3_1_slp): Ditto. (*_1_slp): Ditto. (*_1_slp): Ditto. (*addqi_ext_1): Redefine as define_insn_and_split. Add alternative 1 and split insn after reload for unmatched operand 0. (*qi_ext_2): Merge pattern from "*addqi_ext_2" and "*subqi_ext_2" using plusminus code iterator. Redefine as define_insn_and_split. Add alternative 1 and split insn after reload for unmatched operand 0. (*subqi_ext_1): Redefine as define_insn_and_split. Add alternative 1 and split insn after reload for unmatched operand 0. (*qi_ext_0): Merge pattern from "*andqi_ext_0" and and "*qi_ext_0" using any_logic code iterator. (*qi_ext_1): Merge pattern from "*andqi_ext_1" and "*qi_ext_1" using any_logic code iterator. Redefine as define_insn_and_split. Add alternative 1 and split insn after reload for unmatched operand 0. (*qi_ext_1_cc): Merge pattern from "*andqi_ext_1_cc" and "*xorqi_ext_1_cc" using any_logic code iterator. Redefine as define_insn_and_split. Add alternative 1 and split insn after reload for unmatched operand 0. (*qi_ext_2): Merge pattern from "*andqi_ext_2" and "*qi_ext_2" using any_logic code iterator. Redefine as define_insn_and_split. Add alternative 1 and split insn after reload for unmatched operand 0. (*qi_ext_3): Redefine as define_insn_and_split. Add alternative 1 and split insn after reload for unmatched operand 0. (*negqi_ext_1): Rename from "*negqi_ext_2". Add alternative 1 and split insn after reload for unmatched operand 0. (*one_cmplqi_ext_1): Ditto. (*ashlqi_ext_1): Ditto. (*qi_ext_1): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr78904-1.c (test_sub): New test. * gcc.target/i386/pr78904-1a.c (test_sub): Ditto. * gcc.target/i386/pr78904-1b.c (test_sub): Ditto. * gcc.target/i386/pr78904-2.c (test_sub): Ditto. * gcc.target/i386/pr78904-2a.c (test_sub): Ditto. * gcc.target/i386/pr78904-2b.c (test_sub): Ditto. * gcc.target/i386/pr78952-4.c (test_sub): Ditto. * gcc.target/i386/pr82524.c: New test. * gcc.target/i386/pr82524-1.c: New test. * gcc.target/i386/pr82524-2.c: New test. * gcc.target/i386/pr82524-3.c: New test. --- diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 990229903778..ce7102af44fb 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -6596,7 +6596,9 @@ return "add{}\t{%2, %0|%0, %2}"; } } - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]) + || rtx_equal_p (operands[0], operands[2]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) @@ -7001,38 +7003,58 @@ (match_operand:QI 2 "const_int_operand")) 0)) (clobber (reg:CC FLAGS_REG))])]) -(define_insn "*addqi_ext_1" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*addqi_ext_1" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (plus:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) - (match_operand:QI 2 "general_operand" "QnBn")) 0)) + (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0)) (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" + "" { + if (which_alternative) + return "#"; + switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{b}\t%h0"; else - { + { gcc_assert (operands[2] == constm1_rtx); - return "dec{b}\t%h0"; - } + return "dec{b}\t%h0"; + } default: return "add{b}\t{%2, %h0|%h0, %2}"; } } + "reload_completed + && !rtx_equal_p (operands[0], operands[1])" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (plus:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set_attr "addr" "gpr8") (set (attr "type") (if_then_else (match_operand:QI 2 "incdec_operand") @@ -7040,28 +7062,49 @@ (const_string "alu"))) (set_attr "mode" "QI")]) -(define_insn "*addqi_ext_2" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*qi_ext_2" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 - (plus:QI + (plusminus:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "%0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) (subreg:QI (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") + [(match_operand 2 "int248_register_operand" "Q,Q") (const_int 8) (const_int 8)]) 0)) 0)) - (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1]) - || rtx_equal_p (operands[0], operands[2])" - "add{b}\t{%h2, %h0|%h0, %h2}" + (clobber (reg:CC FLAGS_REG))] + "" + "@ + {b}\t{%h2, %h0|%h0, %h2} + #" + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]) + || ( == PLUS && rtx_equal_p (operands[0], operands[2])))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (plusminus:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (subreg:QI + (match_op_dup 4 + [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set_attr "type" "alu") (set_attr "mode" "QI")]) @@ -7570,7 +7613,8 @@ "@ sub{}\t{%2, %0|%0, %2} #" - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) @@ -7627,28 +7671,44 @@ (set_attr "type" "alu") (set_attr "mode" "QI")]) -(define_insn "*subqi_ext_2" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*subqi_ext_1" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (minus:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) - (subreg:QI - (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0)) 0)) - (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" - "sub{b}\t{%h2, %h0|%h0, %h2}" - [(set_attr "type" "alu") + (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0)) + (clobber (reg:CC FLAGS_REG))] + "" + "@ + sub{b}\t{%2, %h0|%h0, %2} + #" + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (minus:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" + [(set_attr "addr" "gpr8") + (set_attr "type" "alu") (set_attr "mode" "QI")]) ;; Subtract with jump on overflow. @@ -11338,20 +11398,22 @@ (symbol_ref "true")))]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. -(define_insn_and_split "*and_1_slp" +(define_insn_and_split "*_1_slp" [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+,&")) - (and:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!") - (match_operand:SWI12 2 "general_operand" "mn,mn"))) + (any_logic:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!") + (match_operand:SWI12 2 "general_operand" "mn,mn"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "@ - and{}\t{%2, %0|%0, %2} + {}\t{%2, %0|%0, %2} #" - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]) + || rtx_equal_p (operands[0], operands[2]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) - (and:SWI12 (match_dup 0) (match_dup 2))) + (any_logic:SWI12 (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") @@ -11528,9 +11590,9 @@ [(set_attr "type" "alu") (set_attr "mode" "")]) -(define_insn "*andqi_ext_0" +(define_insn "*qi_ext_0" [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn") - (and:QI + (any_logic:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 2 "int248_register_operand" "Q") @@ -11539,7 +11601,7 @@ (match_operand:QI 1 "nonimmediate_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "" - "and{b}\t{%h2, %0|%0, %h2}" + "{b}\t{%h2, %0|%0, %h2}" [(set_attr "addr" "gpr8") (set_attr "type" "alu") (set_attr "mode" "QI")]) @@ -11558,86 +11620,180 @@ (match_operand:QI 2 "const_int_operand")) 0)) (clobber (reg:CC FLAGS_REG))])]) -(define_insn "*andqi_ext_1" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*qi_ext_1" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 - (and:QI + (any_logic:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) - (match_operand:QI 2 "general_operand" "QnBn")) 0)) + (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0)) (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" - "and{b}\t{%2, %h0|%h0, %2}" + "" + "@ + {b}\t{%2, %h0|%h0, %2} + #" + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (any_logic:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set_attr "addr" "gpr8") (set_attr "type" "alu") (set_attr "mode" "QI")]) -;; Generated by peephole translating test to and. This shows up -;; often in fp comparisons. -(define_insn "*andqi_ext_1_cc" - [(set (reg FLAGS_REG) - (compare - (and:QI - (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") - (const_int 8) - (const_int 8)]) 0) - (match_operand:QI 2 "general_operand" "QnBn")) - (const_int 0))) +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*qi_ext_1_cc" + [(set (match_operand 4 "flags_reg_operand") + (match_operator 5 "compare_operator" + [(any_logic:QI + (subreg:QI + (match_operator:SWI248 3 "extract_operator" + [(match_operand 1 "int248_register_operand" "0,!Q") + (const_int 8) + (const_int 8)]) 0) + (match_operand:QI 2 "general_operand" "QnBn,QnBn")) + (const_int 0)])) (set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 - (and:QI + (any_logic:QI (subreg:QI (match_op_dup 3 - [(match_dup 1) - (const_int 8) - (const_int 8)]) 0) + [(match_dup 0) (const_int 8) (const_int 8)]) 0) (match_dup 2)) 0))] - "ix86_match_ccmode (insn, CCNOmode) - /* FIXME: without this LRA can't reload this pattern, see PR82524. */ - && rtx_equal_p (operands[0], operands[1])" - "and{b}\t{%2, %h0|%h0, %2}" + "ix86_match_ccmode (insn, CCNOmode)" + "@ + {b}\t{%2, %h0|%h0, %2} + #" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (match_dup 4) + (match_op_dup 5 + [(any_logic:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) + (const_int 0)])) + (set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (any_logic:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 1) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) 0))])] + "" [(set_attr "addr" "gpr8") (set_attr "type" "alu") (set_attr "mode" "QI")]) -(define_insn "*andqi_ext_2" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*qi_ext_2" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 - (and:QI + (any_logic:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "%0") + [(match_operand 1 "int248_register_operand" "%0,!Q") (const_int 8) (const_int 8)]) 0) (subreg:QI (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") + [(match_operand 2 "int248_register_operand" "Q,Q") (const_int 8) (const_int 8)]) 0)) 0)) (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1]) - || rtx_equal_p (operands[0], operands[2])" - "and{b}\t{%h2, %h0|%h0, %h2}" + "" + "@ + {b}\t{%h2, %h0|%h0, %h2} + #" + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]) + || rtx_equal_p (operands[0], operands[2]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (any_logic:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (subreg:QI + (match_op_dup 4 + [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set_attr "type" "alu") (set_attr "mode" "QI")]) -;; *andqi_ext_3 is defined via *qi_ext_3 below. +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*qi_ext_3" + [(set (zero_extract:SWI248 + (match_operand 0 "int248_register_operand" "+Q,&Q") + (const_int 8) + (const_int 8)) + (match_operator:SWI248 3 "extract_operator" + [(any_logic + (match_operand 1 "int248_register_operand" "%0,!Q") + (match_operand 2 "int248_register_operand" "Q,Q")) + (const_int 8) + (const_int 8)])) + (clobber (reg:CC FLAGS_REG))] + "GET_MODE (operands[1]) == GET_MODE (operands[2])" + "@ + {b}\t{%h2, %h0|%h0, %h2} + #" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]) + || rtx_equal_p (operands[0], operands[2]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (match_op_dup 3 + [(any_logic (match_dup 4) (match_dup 2)) + (const_int 8) (const_int 8)])) + (clobber (reg:CC FLAGS_REG))])] + "operands[4] = gen_lowpart (GET_MODE (operands[1]), operands[0]);" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) ;; Convert wide AND instructions with immediate operand to shorter QImode ;; equivalents when possible. @@ -12166,26 +12322,6 @@ (symbol_ref "!TARGET_PARTIAL_REG_STALL")] (symbol_ref "true")))]) -;; Alternative 1 is needed to work around LRA limitation, see PR82524. -(define_insn_and_split "*_1_slp" - [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+,&")) - (any_or:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!") - (match_operand:SWI12 2 "general_operand" "mn,mn"))) - (clobber (reg:CC FLAGS_REG))] - "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" - "@ - {}\t{%2, %0|%0, %2} - #" - "&& reload_completed" - [(set (strict_low_part (match_dup 0)) (match_dup 1)) - (parallel - [(set (strict_low_part (match_dup 0)) - (any_or:SWI12 (match_dup 0) (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] - "" - [(set_attr "type" "alu") - (set_attr "mode" "")]) - ;; convert (sign_extend:WIDE (any_logic:NARROW (memory, immediate))) ;; to (any_logic:WIDE (sign_extend (memory)), (sign_extend (immediate))). ;; This eliminates sign extension after logic operation. @@ -12276,90 +12412,6 @@ [(set_attr "type" "alu") (set_attr "mode" "")]) -(define_insn "*qi_ext_0" - [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn") - (any_or:QI - (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0) - (match_operand:QI 1 "nonimmediate_operand" "0"))) - (clobber (reg:CC FLAGS_REG))] - "" - "{b}\t{%h2, %0|%0, %h2}" - [(set_attr "addr" "gpr8") - (set_attr "type" "alu") - (set_attr "mode" "QI")]) - -(define_insn "*qi_ext_1" - [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") - (const_int 8) - (const_int 8)) - (subreg:SWI248 - (any_or:QI - (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") - (const_int 8) - (const_int 8)]) 0) - (match_operand:QI 2 "general_operand" "QnBn")) 0)) - (clobber (reg:CC FLAGS_REG))] - "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) - /* FIXME: without this LRA can't reload this pattern, see PR82524. */ - && rtx_equal_p (operands[0], operands[1])" - "{b}\t{%2, %h0|%h0, %2}" - [(set_attr "addr" "gpr8") - (set_attr "type" "alu") - (set_attr "mode" "QI")]) - -(define_insn "*qi_ext_2" - [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") - (const_int 8) - (const_int 8)) - (subreg:SWI248 - (any_or:QI - (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "%0") - (const_int 8) - (const_int 8)]) 0) - (subreg:QI - (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0)) 0)) - (clobber (reg:CC FLAGS_REG))] - "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) - /* FIXME: without this LRA can't reload this pattern, see PR82524. */ - && (rtx_equal_p (operands[0], operands[1]) - || rtx_equal_p (operands[0], operands[2]))" - "{b}\t{%h2, %h0|%h0, %h2}" - [(set_attr "type" "alu") - (set_attr "mode" "QI")]) - -(define_insn "*qi_ext_3" - [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") - (const_int 8) - (const_int 8)) - (zero_extract:SWI248 - (any_logic:SWI248 - (match_operand 1 "int248_register_operand" "%0") - (match_operand 2 "int248_register_operand" "Q")) - (const_int 8) - (const_int 8))) - (clobber (reg:CC FLAGS_REG))] - "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) - /* FIXME: without this LRA can't reload this pattern, see PR82524. */ - && (rtx_equal_p (operands[0], operands[1]) - || rtx_equal_p (operands[0], operands[2]))" - "{b}\t{%h2, %h0|%h0, %h2}" - [(set_attr "type" "alu") - (set_attr "mode" "QI")]) - ;; Convert wide OR instructions with immediate operand to shorter QImode ;; equivalents when possible. ;; Don't do the splitting with memory operands, since it introduces risk @@ -12443,37 +12495,6 @@ (const_int 8)) 0) (match_dup 2)) 0))])]) -(define_insn "*xorqi_ext_1_cc" - [(set (reg FLAGS_REG) - (compare - (xor:QI - (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") - (const_int 8) - (const_int 8)]) 0) - (match_operand:QI 2 "general_operand" "QnBn")) - (const_int 0))) - (set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") - (const_int 8) - (const_int 8)) - (subreg:SWI248 - (xor:QI - (subreg:QI - (match_op_dup 3 - [(match_dup 1) - (const_int 8) - (const_int 8)]) 0) - (match_dup 2)) 0))] - "ix86_match_ccmode (insn, CCNOmode) - /* FIXME: without this LRA can't reload this pattern, see PR82524. */ - && rtx_equal_p (operands[0], operands[1])" - "xor{b}\t{%2, %h0|%h0, %2}" - [(set_attr "addr" "gpr8") - (set_attr "type" "alu") - (set_attr "mode" "QI")]) - ;; Peephole2 rega = 0; rega op= regb into rega = regb. (define_peephole2 [(parallel [(set (match_operand:SWI 0 "general_reg_operand") @@ -12813,7 +12834,8 @@ "@ neg{}\t%0 #" - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) @@ -12881,22 +12903,40 @@ (set (match_operand:SWI48 0 "register_operand") (neg:SWI48 (match_dup 1)))])]) -(define_insn "*negqi_ext_2" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*negqi_ext_1" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (neg:QI (subreg:QI (match_operator:SWI248 2 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0)) 0)) (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" - "neg{b}\t%h0" + "" + "@ + neg{b}\t%h0 + #" + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (neg:QI + (subreg:QI + (match_op_dup 2 + [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set_attr "type" "negnot") (set_attr "mode" "QI")]) @@ -13420,7 +13460,8 @@ "@ not{}\t%0 #" - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (set (strict_low_part (match_dup 0)) (not:SWI12 (match_dup 0)))] @@ -13479,6 +13520,40 @@ (const_int 0)])) (set (match_dup 1) (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])]) + +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*one_cmplqi_ext_1" + [(set (zero_extract:SWI248 + (match_operand 0 "int248_register_operand" "+Q,&Q") + (const_int 8) + (const_int 8)) + (subreg:SWI248 + (not:QI + (subreg:QI + (match_operator:SWI248 2 "extract_operator" + [(match_operand 1 "int248_register_operand" "0,!Q") + (const_int 8) + (const_int 8)]) 0)) 0))] + "" + "@ + not{b}\t%h0 + #" + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (not:QI + (subreg:QI + (match_op_dup 2 + [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))] + "" + [(set_attr "type" "negnot") + (set_attr "mode" "QI")]) ;; Shift instructions @@ -14254,7 +14329,8 @@ return "sal{}\t{%2, %0|%0, %2}"; } } - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) @@ -14458,23 +14534,26 @@ (const_string "*"))) (set_attr "mode" "")]) -(define_insn "*ashlqi_ext_2" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*ashlqi_ext_1" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (ashift:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) - (match_operand:QI 2 "nonmemory_operand" "cI")) 0)) - (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" + (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0)) + (clobber (reg:CC FLAGS_REG))] + "" { + if (which_alternative) + return "#"; + switch (get_attr_type (insn)) { case TYPE_ALU: @@ -14489,6 +14568,22 @@ return "sal{b}\t{%2, %h0|%h0, %2}"; } } + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (match_dup 1)) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (ashift:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set (attr "type") (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 2 "const1_operand")) @@ -15247,7 +15342,8 @@ else return "{}\t{%2, %0|%0, %2}"; } - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) @@ -15361,29 +15457,48 @@ (const_string "*"))) (set_attr "mode" "")]) -(define_insn "*qi_ext_2" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*qi_ext_1" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (any_shiftrt:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) - (match_operand:QI 2 "nonmemory_operand" "cI")) 0)) - (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" + (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0)) + (clobber (reg:CC FLAGS_REG))] + "" { + if (which_alternative) + return "#"; + if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) return "{b}\t%h0"; else return "{b}\t{%2, %h0|%h0, %2}"; } + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (match_dup 1)) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (any_shiftrt:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set_attr "type" "ishift") (set (attr "length_immediate") (if_then_else @@ -15875,7 +15990,8 @@ else return "{}\t{%2, %0|%0, %2}"; } - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) diff --git a/gcc/testsuite/gcc.target/i386/pr78904-1.c b/gcc/testsuite/gcc.target/i386/pr78904-1.c index d27d7fd651d0..ed5403f80672 100644 --- a/gcc/testsuite/gcc.target/i386/pr78904-1.c +++ b/gcc/testsuite/gcc.target/i386/pr78904-1.c @@ -46,3 +46,12 @@ struct S1 test_add (struct S1 a, struct S1 b) } /* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S1 test_sub (struct S1 a, struct S1 b) +{ + a.val -= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr78904-1a.c b/gcc/testsuite/gcc.target/i386/pr78904-1a.c index 7746477d745b..aa9273eeb646 100644 --- a/gcc/testsuite/gcc.target/i386/pr78904-1a.c +++ b/gcc/testsuite/gcc.target/i386/pr78904-1a.c @@ -45,3 +45,12 @@ struct S1 test_add (struct S1 a, struct S1 b) } /* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S1 test_sub (struct S1 a, struct S1 b) +{ + a.val -= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr78904-1b.c b/gcc/testsuite/gcc.target/i386/pr78904-1b.c index 20b677252ab5..0687c95e9122 100644 --- a/gcc/testsuite/gcc.target/i386/pr78904-1b.c +++ b/gcc/testsuite/gcc.target/i386/pr78904-1b.c @@ -47,3 +47,12 @@ struct S1 test_add (struct S1 a, struct S1 b) } /* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S1 test_sub (struct S1 a, struct S1 b) +{ + a.val -= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr78904-2.c b/gcc/testsuite/gcc.target/i386/pr78904-2.c index 0cc4aaa91ea8..3e9389ec20d0 100644 --- a/gcc/testsuite/gcc.target/i386/pr78904-2.c +++ b/gcc/testsuite/gcc.target/i386/pr78904-2.c @@ -47,3 +47,12 @@ struct S1 test_add (struct S1 a) } /* { dg-final { scan-assembler "\[ \t\]addb\[ \t\]+t\[^\n\r]*, %.h" } } */ + +struct S1 test_sub (struct S1 a) +{ + a.val -= t.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb\[ \t\]+t\[^\n\r]*, %.h" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr78904-2a.c b/gcc/testsuite/gcc.target/i386/pr78904-2a.c index 41eaa2591581..f0c5979c8217 100644 --- a/gcc/testsuite/gcc.target/i386/pr78904-2a.c +++ b/gcc/testsuite/gcc.target/i386/pr78904-2a.c @@ -46,3 +46,12 @@ struct S1 test_add (struct S1 a) } /* { dg-final { scan-assembler "\[ \t\]addb\[ \t\]+t\[^\n\r]*, %.h" } } */ + +struct S1 test_sub (struct S1 a) +{ + a.val -= t.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb\[ \t\]+t\[^\n\r]*, %.h" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr78904-2b.c b/gcc/testsuite/gcc.target/i386/pr78904-2b.c index 23e975ac93ef..e6154e6d918a 100644 --- a/gcc/testsuite/gcc.target/i386/pr78904-2b.c +++ b/gcc/testsuite/gcc.target/i386/pr78904-2b.c @@ -48,3 +48,12 @@ struct S1 test_add (struct S1 a) } /* { dg-final { scan-assembler "\[ \t\]addb\[ \t\]+t\[^\n\r]*, %.h" } } */ + +struct S1 test_sub (struct S1 a) +{ + a.val -= t.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb\[ \t\]+t\[^\n\r]*, %.h" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr78952-4.c b/gcc/testsuite/gcc.target/i386/pr78952-4.c index c7bd63c95438..d99796729459 100644 --- a/gcc/testsuite/gcc.target/i386/pr78952-4.c +++ b/gcc/testsuite/gcc.target/i386/pr78952-4.c @@ -46,3 +46,12 @@ struct S1 test_add (struct S1 a, struct S1 b) } /* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S1 test_sub (struct S1 a, struct S1 b) +{ + a.val -= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82524-1.c b/gcc/testsuite/gcc.target/i386/pr82524-1.c new file mode 100644 index 000000000000..6539630900a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82524-1.c @@ -0,0 +1,63 @@ +/* PR target/82524 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-additional-options "-mregparm=3" { target ia32 } } */ +/* { dg-final { scan-assembler-not "mov\[sz\]bl" } } */ + +struct S +{ + char pad1; + char val; + char pad2; + char pad3; +}; + +struct S +test_plus (struct S a, struct S b, struct S c) +{ + a.val = b.val + c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S +test_minus (struct S a, struct S b, struct S c) +{ + a.val = b.val - c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ + +struct S +test_and (struct S a, struct S b, struct S c) +{ + a.val = b.val & c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]andb" } } */ + +struct S +test_or (struct S a, struct S b, struct S c) +{ + a.val = b.val | c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]orb" } } */ + +struct S +test_xor (struct S a, struct S b, struct S c) +{ + a.val = b.val ^ c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]xorb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82524-2.c b/gcc/testsuite/gcc.target/i386/pr82524-2.c new file mode 100644 index 000000000000..766dd1aae1e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82524-2.c @@ -0,0 +1,63 @@ +/* PR target/82524 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-additional-options "-mregparm=3" { target ia32 } } */ +/* { dg-final { scan-assembler-not "mov\[sz\]bl" } } */ + +struct S +{ + char pad1; + char val; + char pad2; + char pad3; +}; + +struct S +test_plus (struct S a, struct S b, char *c) +{ + a.val = b.val + *c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S +test_minus (struct S a, struct S b, char *c) +{ + a.val = b.val - *c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ + +struct S +test_and (struct S a, struct S b, char *c) +{ + a.val = b.val & *c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]andb" } } */ + +struct S +test_or (struct S a, struct S b, char *c) +{ + a.val = b.val | *c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]orb" } } */ + +struct S +test_xor (struct S a, struct S b, char *c) +{ + a.val = b.val ^ *c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]xorb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82524-3.c b/gcc/testsuite/gcc.target/i386/pr82524-3.c new file mode 100644 index 000000000000..7a66712193e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82524-3.c @@ -0,0 +1,42 @@ +/* PR target/82524 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-final { scan-assembler-not "movzbl" } } */ + +struct S +{ + unsigned char pad1; + unsigned char val; + unsigned short pad2; + unsigned int pad3; +}; + +struct S +test_and (struct S a, struct S b, struct S c) +{ + a.val = b.val & c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]andb" } } */ + +struct S +test_or (struct S a, struct S b, struct S c) +{ + a.val = b.val | c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]orb" } } */ + +struct S +test_xor (struct S a, struct S b, struct S c) +{ + a.val = b.val ^ c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]xorb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82524.c b/gcc/testsuite/gcc.target/i386/pr82524.c new file mode 100644 index 000000000000..058f0a2d14df --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82524.c @@ -0,0 +1,83 @@ +/* PR target/82524 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-additional-options "-mregparm=3" { target ia32 } } */ +/* { dg-final { scan-assembler-not "mov\[sz\]bl" } } */ + +struct S +{ + char pad1; + char val; + char pad2; + char pad3; +}; + +struct S +test_plus (struct S a, struct S b, char c) +{ + a.val = b.val + c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S +test_minus (struct S a, struct S b, char c) +{ + a.val = b.val - c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ + +struct S +test_neg (struct S a, struct S b) +{ + a.val = -b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]negb" } } */ + +struct S +test_and (struct S a, struct S b, char c) +{ + a.val = b.val & c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]andb" } } */ + +struct S +test_or (struct S a, struct S b, char c) +{ + a.val = b.val | c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]orb" } } */ + +struct S +test_xor (struct S a, struct S b, char c) +{ + a.val = b.val ^ c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]xorb" } } */ + +struct S +test_not (struct S a, struct S b) +{ + a.val = ~b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]notb" } } */