This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Modifying ARM code generator for elimination of 8bit writes - need help


On Fri, Jun 02, 2006 at 09:24:17AM +0200, Rask Ingemann Lambertsen wrote:

> The rest of the ARM backend presently assumes that the pattern has the form
> 
> (set (operand:QI 0) (operand:QI 1))
> 
> but now we've changed it to
> 
> (parallel [(set (operand:QI 0) (operand:QI 1))
> 	   (clobber (operand:QI 2))
> ])
> 
> so that's why you get "unrecognizable insn" errors now. Any place which
> intended to generate an *arm_movqi_insn has to add a clobber also. For a
> start, this means the "movqi" pattern.

I've now implemented it. This brings a small improvement to the code
generated for bytewritetest:

bytewritetest:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 0, uses_anonymous_args = 0
	@ link register save eliminated.
	ldrb	r3, [r0, #5]	@ zero_extendqisi2
	ldrb	ip, [r0, #4]	@ zero_extendqisi2
	ldr	r2, [r0, #0]
	add	r1, r3, ip
	str	r2, [r0, #8]
	str	r1, [r0], #5	<--
	eor	r3, r3, ip
	swpb	r2, r3, [r0]
	@ lr needed for prologue
	bx	lr

Exactly the same number of instructions as without -mswp-byte-writes because
of postincrement. Basicly, it pays off to get the insn expanded correctly to
begin with, rather than leaving it to reload to fix it up later. This should
work fine with volatile variables because there is no need to read back from
memory. The peephole optimizations are gone for the same reason. I do wonder
if the ability to reuse the input register as a scratch register has been
preserved, though.

Compiling unwind-dw2-fde.c, I noticed that the code produced for
__register_frame_info_table_bases() differs more than expected:

__register_frame_info_table_bases:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 0, uses_anonymous_args = 0
 1	stmfd	sp!, {r4, lr}
 2	mov	lr, #0
 3	str	lr, [r1, #16]
 4	ldrb	ip, [r1, #16]	@ zero_extendqisi2
 5	orr	ip, ip, #2
 6	strb	ip, [r1, #16]
 7	ldr	r4, .L28
 8	ldrh	ip, [r1, #16]
 9	ldr	lr, [r4, #0]
10	orr	ip, ip, #2032
11	str	r0, [r1, #12]
12	orr	ip, ip, #8
13	mvn	r0, #0
14	strh	ip, [r1, #16]	@ movhi
15	str	lr, [r1, #20]
16	str	r0, [r1, #0]
17	str	r1, [r4, #0]
18	stmib	r1, {r2, r3}	@ phole stm
19	ldmfd	sp!, {r4, pc}

vs.

__register_frame_info_table_bases:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 0, uses_anonymous_args = 0
 2	mov	ip, #0
 3	str	ip, [r1, #16]
 1	str	lr, [sp, #-4]!
 4	ldrb	lr, [r1, #16]	@ zero_extendqisi2
11	str	r0, [r1, #12]
 5	orr	lr, lr, #2
13	mvn	r0, #0
 6a	add	ip, r1, #16
16+18?	stmia	r1, {r0, r2, r3}	@ phole stm
 6b	swpb	r3, lr, [ip]
 7	ldr	r0, .L28
 8	ldrh	r3, [r1, #16]
 9	ldr	r2, [r0, #0]
10	orr	r3, r3, #2032
12	orr	r3, r3, #8
14	strh	r3, [r1, #16]	@ movhi
15	str	r2, [r1, #20]
17	str	r1, [r0, #0]
19	ldr	pc, [sp], #4

But the swp version seems to be equivalent, doesn't it?

I'm not sure that the reload_outqi expander will correctly handle
cases where reload spills a register to memory. If the memory address
doesn't have the right form, it becomes more complicated.

Index: gcc/config/arm/arm.h
===================================================================
--- gcc/config/arm/arm.h	(revision 114119)
+++ gcc/config/arm/arm.h	(working copy)
@@ -1094,6 +1094,8 @@
    ? vfp_secondary_reload_class (MODE, X)			\
    : TARGET_ARM							\
    ? (((MODE) == HImode && ! arm_arch4 && true_regnum (X) == -1) \
+   || ((MODE) == QImode && TARGET_ARM && TARGET_SWP_BYTE_WRITES	\
+       && true_regnum (X) == -1)				\
     ? GENERAL_REGS : NO_REGS)					\
    : THUMB_SECONDARY_OUTPUT_RELOAD_CLASS (CLASS, MODE, X))
 
Index: gcc/config/arm/arm.opt
===================================================================
--- gcc/config/arm/arm.opt	(revision 114119)
+++ gcc/config/arm/arm.opt	(working copy)
@@ -153,3 +153,7 @@
 mwords-little-endian
 Target Report RejectNegative Mask(LITTLE_WORDS)
 Assume big endian bytes, little endian words
+
+mswp-byte-writes
+Target Report Mask(SWP_BYTE_WRITES)
+Use the swp instruction for byte writes. The default is to use str
Index: gcc/config/arm/predicates.md
===================================================================
--- gcc/config/arm/predicates.md	(revision 114119)
+++ gcc/config/arm/predicates.md	(working copy)
@@ -125,6 +125,14 @@
 			 || (GET_CODE (op) == REG
 			     && REGNO (op) >= FIRST_PSEUDO_REGISTER)))")))
 
+;; Match register operands or memory operands of the form (mem (reg ...)),
+;; as permitted by the "Q" memory constraint.
+(define_predicate "reg_or_Qmem_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "mem")
+	    (match_code "reg" "0")))
+)
+
 ;; True for valid operands for the rhs of an floating point insns.
 ;;   Allows regs or certain consts on FPA, just regs for everything else.
 (define_predicate "arm_float_rhs_operand"
Index: gcc/config/arm/arm.md
===================================================================
--- gcc/config/arm/arm.md	(revision 114119)
+++ gcc/config/arm/arm.md	(working copy)
@@ -5151,6 +5151,16 @@
       emit_insn (gen_movsi (operands[0], operands[1]));
       DONE;
     }
+  if (TARGET_ARM && TARGET_SWP_BYTE_WRITES)
+    {
+      /* Ensure that operands[0] is (mem (reg ...)) if a memory operand. */
+      if (MEM_P (operands[0]) && !REG_P (XEXP (operands[0], 0)))
+	    operands[0]
+	      = replace_equiv_address (operands[0],
+				       copy_to_reg (XEXP (operands[0], 0)));
+      emit_insn (gen__arm_movqi_insn_swp (operands[0], operands[1]));
+      DONE;
+    }
   "
 )
 
@@ -5158,7 +5168,7 @@
 (define_insn "*arm_movqi_insn"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m")
 	(match_operand:QI 1 "general_operand" "rI,K,m,r"))]
-  "TARGET_ARM
+  "TARGET_ARM && !TARGET_SWP_BYTE_WRITES
    && (   register_operand (operands[0], QImode)
        || register_operand (operands[1], QImode))"
   "@
@@ -5170,6 +5180,31 @@
    (set_attr "predicable" "yes")]
 )
 
+;; This is primarily a hack for the Nintendo DS external RAM.
+(define_insn "_arm_movqi_insn_swp"
+  [(set (match_operand:QI 0 "reg_or_Qmem_operand" "=r,r,r,Q")
+	(match_operand:QI 1 "general_operand" "rI,K,m,r"))
+        (clobber (match_scratch:QI 2 "=X,X,X,r"))]
+  "TARGET_ARM && TARGET_SWP_BYTE_WRITES
+   && (   register_operand (operands[0], QImode)
+       || register_operand (operands[1], QImode))"
+  "@
+   mov%?\\t%0, %1
+   mvn%?\\t%0, #%B1
+   ldr%?b\\t%0, %1
+   swp%?b\\t%2, %1, [%|%m0]"
+  [(set_attr "type" "*,*,load1,store1")
+   (set_attr "predicable" "yes")]
+)
+
+;; The earlyclobber is required by default_secondary_reload() in targhooks.c.
+(define_expand "reload_outqi"
+  [(set (match_operand:QI 0 "memory_operand" "=Q")
+	(match_operand:QI 1 "register_operand" "r"))
+   (clobber (match_operand:QI 2 "register_operand" "=&r"))]
+  "TARGET_ARM && TARGET_SWP_BYTE_WRITES"
+)
+
 (define_insn "*thumb_movqi_insn"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=l,l,m,*r,*h,l")
 	(match_operand:QI 1 "general_operand"      "l, m,l,*h,*r,I"))]


-- 
Rask Ingemann Lambertsen


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]