This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 2/2][ARC] Add support for ARCv2 CPUs


 * config/arc/gmon/profil.S (__profil, __profil_irq): Don't profile

Shouldn't profil should return -1 on failure?

config/arc/lib1funcs.S (__udivmodsi4):

@@ -393,7 +393,12 @@ SYM(__udivmodsi4):
        lsr_s r1,r1
        cmp_s r0,r1
        xor.f r2,lp_count,31
+#if !defined (__EM__) && !defined (__HS__)
        mov_s lp_count,r2
+#else
+       mov lp_count,r2
+       nop_s
+#endif /* !__EM__ && !__HS__ */
 #endif /* !__ARC_NORM__ */
        sub.cc r0,r0,r1
        mov_s r3,3

This is in ! __ARC_NORM__ code - so this should never happen for __HS__ .
It can happen for __EM__ - and then we have another problem.  AFAIK
__EM__ also has the LP_COUNT interlock that slows down every reference
to LP_COUNT by an ordinary instruction.  So this ARC600 code will run
rather slow on it.  OTOH, we can use the better handling of branch
instructions inside a zero overhead loop to our advantage here, like this:

#else /* ! __ARC_NORM__ */
        lsr_s r2,r0
        brhs.d r1,r2,.Lret0_3

#ifdef __EM__
        mov lp_count,-1
        asl_s r1,r1             ; den <<= 1
        lp 1f
.Loop1:
        brhi r1,r2,1f
        asl.ls r1,r1
1:
        sub_s r0,r0,r1
        lsr_s r1,r1
        cmp r0,r1
        not r2,lp_count
        mov.f lp_count,r2
#else /*  !__EM__  */
        mov_s r3,0
.Lloop1:
        asl_s r1,r1             ; den <<= 1
        brls.d r1,r2,@.Lloop1
        sub_s r3,r3,1
        sub_s r0,r0,r1
        lsr_s r1,r1
        cmp_s r0,r1
        not_s r2,r3
        mov.f lp_count,r3
#endif /* !__EM__ */
#endif /* !__ARC_NORM__

Hmm, somehow I lost the inner-loop explicit lp_count reference anyway, but
the version with lp is still one instruction inside the loop shorter.
Although the other is shorter statically.
OTOH, maybe not.f / mov_s is faster when followed by sub.cc ; depends on
what the ARC600 pipeline is like today.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]