Modifying ARM code generator for elimination of 8bit writes - need help

Rask Ingemann Lambertsen rask@sygehus.dk
Mon Jun 5 14:17:00 GMT 2006


On Mon, Jun 05, 2006 at 01:47:10PM +0200, Wolfgang Mües wrote:

> I don't know why the form "[%r, #0]" was coded before, because the 
> assembler understands "[%r]" very well for all instructions. The form 
> "[%r]" has a wider usage because it covers swp too.

Does GCC happen to accept "[%r, #0]" for swp?

> Also, I wonder what the "Q" constraint really means:
> 
> from the GCC manual:
> 
> > Q
> > A memory reference where the exact address is in a single register
> > (``m'' is preferable for asm statements)
> 
> but in arm.h:
> 
> > /* For the ARM, `Q' means that this is a memory operand that is just
> >    an offset from a register.
> > #define EXTRA_CONSTRAINT_STR_ARM(OP, C, STR)			\
> >    ((C) == 'Q') ? (GET_CODE (OP) == MEM					\
> > 		 && GET_CODE (XEXP (OP, 0)) == REG) :			\

I think the comment in arm.h is wrong. The manual seems to agree with the
code.

> Obviously, GCC tries to implement REG+CONSTANT with Q.
> 
> Maybe I must define a new constraint?

I tried 'V' instead, but it looks as if reload completely ignores the
meaning of the constraint. There is already a comment in arm.md about that.
It should be investigated further.

Meanwhile, I changed arm_legitimate_address_p() to enforce the correct
address form. This hurts byte loads too, though.

Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c	(revision 114119)
+++ gcc/config/arm/arm.c	(working copy)
@@ -3509,6 +3509,9 @@
   if (arm_address_register_rtx_p (x, strict_p))
     return 1;
 
+  if (TARGET_ARM && TARGET_SWP_BYTE_WRITES && mode == QImode && outer == SET)
+    return 0;
+
   use_ldrd = (TARGET_LDRD
 	      && (mode == DImode
 		  || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));

Index: gcc/config/arm/arm.opt
===================================================================
--- gcc/config/arm/arm.opt	(revision 114119)
+++ gcc/config/arm/arm.opt	(working copy)
@@ -153,3 +153,7 @@
 mwords-little-endian
 Target Report RejectNegative Mask(LITTLE_WORDS)
 Assume big endian bytes, little endian words
+
+mswp-byte-writes
+Target Report Mask(SWP_BYTE_WRITES)
+Use the swp instruction for byte writes

Index: gcc/config/arm/arm.md
===================================================================
--- gcc/config/arm/arm.md	(revision 114119)
+++ gcc/config/arm/arm.md	(working copy)
@@ -5158,7 +5158,7 @@
 (define_insn "*arm_movqi_insn"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m")
 	(match_operand:QI 1 "general_operand" "rI,K,m,r"))]
-  "TARGET_ARM
+  "TARGET_ARM && !TARGET_SWP_BYTE_WRITES
    && (   register_operand (operands[0], QImode)
        || register_operand (operands[1], QImode))"
   "@
@@ -5170,6 +5170,44 @@
    (set_attr "predicable" "yes")]
 )
 
+; This is for the Nintendo DS external RAM.
+(define_insn "*arm_movqi_insn_swp"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,Q")
+	(match_operand:QI 1 "general_operand" "rI,K,m,r"))]
+  "TARGET_ARM && TARGET_SWP_BYTE_WRITES
+   && (   register_operand (operands[0], QImode)
+       || register_operand (operands[1], QImode))"
+  "@
+   mov%?\\t%0, %1
+   mvn%?\\t%0, #%B1
+   ldr%?b\\t%0, %1
+   swp%?b\\t%1, %1, %0\;ldr%?b\\t%1, %0"
+  [(set_attr "type" "*,*,load1,store1")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*arm_movqi_insn_swp_clobber"
+  [(set (match_operand:QI 0 "memory_operand" "=Q")
+        (match_operand:QI 1 "register_operand" "r"))
+   (clobber (match_operand:QI 2 "register_operand" "=r"))]
+  "TARGET_ARM && TARGET_SWP_BYTE_WRITES"
+  "swp%?b\\t%2, %1, %0"
+  [(set_attr "type" "store1")
+   (set_attr "predicable" "yes")]
+)
+
+; Avoid reading the stored value back if we have a spare register.
+(define_peephole2
+  [(match_scratch:QI 2 "r")
+   (set (match_operand:QI 0 "memory_operand" "")
+        (match_operand:QI 1 "register_operand" ""))]
+  "TARGET_ARM && TARGET_SWP_BYTE_WRITES"
+  [(parallel [
+    (set (match_dup 0) (match_dup 1))
+    (clobber (match_dup 2))]
+  )]
+)
+
 (define_insn "*thumb_movqi_insn"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=l,l,m,*r,*h,l")
 	(match_operand:QI 1 "general_operand"      "l, m,l,*h,*r,I"))]

This seems to work as intended on a small test case:

struct foobar
{
	int  i1;
	char c1;
	char c2;
	int  i2;
};

void bytewritetest (struct foobar *x)
{
	x->i2 = x->i1;
	x->i1 = x->c1 + x->c2;
	x->c2 ^= x->c1;
}

With just -O2:

bytewritetest:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 0, uses_anonymous_args = 0
	@ link register save eliminated.
	ldrb	r3, [r0, #5]	@ zero_extendqisi2
	ldrb	r2, [r0, #4]	@ zero_extendqisi2
	ldr	ip, [r0, #0]
	eor	r1, r3, r2
	add	r3, r3, r2
	@ lr needed for prologue
	strb	r1, [r0, #5]
	str	ip, [r0, #8]
	str	r3, [r0, #0]
	bx	lr

With -O2 -mswp-byte-writes:

bytewritetest:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 0, uses_anonymous_args = 0
	str	lr, [sp, #-4]!
	add	r2, r0, #4
	add	lr, r0, #5
	ldrb	r3, [lr, #0]	@ zero_extendqisi2
	ldrb	r1, [r2, #0]	@ zero_extendqisi2
	eor	r2, r1, r3
	add	r3, r3, r1
	ldr	ip, [r0, #0]
	str	r3, [r0, #0]
	swpb	r3, r2, [lr, #0]
	str	ip, [r0, #8]
	ldr	pc, [sp], #4


The register allocator chooses to use the lr register, in turn causing link
register save alimination to fail, which doesn't help.

-- 
Rask Ingemann Lambertsen



More information about the Gcc-patches mailing list