This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Re: [PATCH 2/2][ARC] Add support for ARCv2 CPUs

From: Joern Wolfgang Rennecke <gnu at amylaar dot uk>
To: Claudiu Zissulescu <Claudiu dot Zissulescu at synopsys dot com>, "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>
Cc: Francois Bedard <Francois dot Bedard at synopsys dot com>, "jeremy dot bennett at embecosm dot com" <jeremy dot bennett at embecosm dot com>
Date: Fri, 23 Oct 2015 22:10:01 +0100
Subject: Re: [PATCH 2/2][ARC] Add support for ARCv2 CPUs
Authentication-results: sourceware.org; auth=none
References: <098ECE41A0A6114BB2A07F1EC238DE896616641A at de02wembxa dot internal dot synopsys dot com>

 * config/arc/gmon/profil.S (__profil, __profil_irq): Don't profile

Shouldn't profil should return -1 on failure?

config/arc/lib1funcs.S (__udivmodsi4):

@@ -393,7 +393,12 @@ SYM(__udivmodsi4):
        lsr_s r1,r1
        cmp_s r0,r1
        xor.f r2,lp_count,31
+#if !defined (__EM__) && !defined (__HS__)
        mov_s lp_count,r2
+#else
+       mov lp_count,r2
+       nop_s
+#endif /* !__EM__ && !__HS__ */
 #endif /* !__ARC_NORM__ */
        sub.cc r0,r0,r1
        mov_s r3,3

This is in ! __ARC_NORM__ code - so this should never happen for __HS__ .
It can happen for __EM__ - and then we have another problem.  AFAIK
__EM__ also has the LP_COUNT interlock that slows down every reference
to LP_COUNT by an ordinary instruction.  So this ARC600 code will run
rather slow on it.  OTOH, we can use the better handling of branch
instructions inside a zero overhead loop to our advantage here, like this:

#else /* ! __ARC_NORM__ */
        lsr_s r2,r0
        brhs.d r1,r2,.Lret0_3

#ifdef __EM__
        mov lp_count,-1
        asl_s r1,r1             ; den <<= 1
        lp 1f
.Loop1:
        brhi r1,r2,1f
        asl.ls r1,r1
1:
        sub_s r0,r0,r1
        lsr_s r1,r1
        cmp r0,r1
        not r2,lp_count
        mov.f lp_count,r2
#else /*  !__EM__  */
        mov_s r3,0
.Lloop1:
        asl_s r1,r1             ; den <<= 1
        brls.d r1,r2,@.Lloop1
        sub_s r3,r3,1
        sub_s r0,r0,r1
        lsr_s r1,r1
        cmp_s r0,r1
        not_s r2,r3
        mov.f lp_count,r3
#endif /* !__EM__ */
#endif /* !__ARC_NORM__

Hmm, somehow I lost the inner-loop explicit lp_count reference anyway, but
the version with lp is still one instruction inside the loop shorter.
Although the other is shorter statically.
OTOH, maybe not.f / mov_s is faster when followed by sub.cc ; depends on
what the ARC600 pipeline is like today.

Follow-Ups:
- RE: [PATCH 2/2][ARC] Add support for ARCv2 CPUs
  - From: Claudiu Zissulescu

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]