Handle divide overflow on x86_64

Mon Apr 10 15:51:00 GMT 2006

I never bothered to enable the divide overflow exception on x86_64
because I didn't think it was an important component of total
runtimes.  To my surprise, I discovered that under heavy application
server load, the divide subroutines account for 4% of total CPU time.

samples  %        image name               app name                 symbol name
176796    2.1502  libgcj.so.7.0.0          gij                      _Jv_remI
104605    1.2722  libgcj.so.7.0.0          gij                      _Jv_divI
57126     0.6948  libgcj.so.7.0.0          gij                      _Jv_remJ
392       0.0048  libgcj.so.7.0.0          gij                      _Jv_divJ

It seems that divide and remainder are used a great deal, particularly
for hash tables, so it's worth inlining them rather than using library
routines.

Andrew.



2006-04-10  Andrew Haley  <aph@redhat.com>

	* include/x86_64-signal.h (HANDLE_DIVIDE_OVERFLOW): New.
	(SIGNAL_HANDLER): Mark arg as unused.
	* configure.host (x86_64-* DIVIDESPEC): Use
	fno-use-divide-subroutine.

Index: configure.host
===================================================================

--- configure.host	(revision 112821)
+++ configure.host	(working copy)
@@ -115,7 +115,7 @@
 	libgcj_flags="${libgcj_flags} -fomit-frame-pointer"
 	libgcj_cxxflags=
 	libgcj_cflags=
-	DIVIDESPEC=-f%{m32:no-}use-divide-subroutine
+	DIVIDESPEC=-fno-use-divide-subroutine
 	enable_hash_synchronization_default=yes
 	slow_pthread_self=yes
 	libgcj_interpreter=yes
Index: include/x86_64-signal.h
===================================================================
--- include/x86_64-signal.h	(revision 112821)
+++ include/x86_64-signal.h	(working copy)
@@ -19,9 +19,88 @@
 #include <sys/syscall.h>
 
 #define HANDLE_SEGV 1
+#define HANDLE_FPE 1
 
-#define SIGNAL_HANDLER(_name)	\
-static void _Jv_##_name (int, siginfo_t *, void *_p)
+#define SIGNAL_HANDLER(_name)					\
+static void _Jv_##_name (int, siginfo_t *,			\
+			 void *_p __attribute__ ((__unused__)))
+
+#define HANDLE_DIVIDE_OVERFLOW						\
+do									\
+{									\
+  struct ucontext *_uc = (struct ucontext *)_p;				\
+  volatile struct sigcontext *_sc = (struct sigcontext *) &_uc->uc_mcontext; \
+									\
+  register unsigned char *_rip = (unsigned char *)_sc->rip;		\
+									\
+  /* According to the JVM spec, "if the dividend is the negative	\
+   * integer of largest possible magnitude for the type and the		\
+   * divisor is -1, then overflow occurs and the result is equal to	\
+   * the dividend.  Despite the overflow, no exception occurs".		\
+									\
+   * We handle this by inspecting the instruction which generated the	\
+   * signal and advancing ip to point to the following instruction.	\
+   * As the instructions are variable length it is necessary to do a	\
+   * little calculation to figure out where the following instruction	\
+   * actually is.							\
+									\
+   */									\
+									\
+  bool _is_64_bit = false;						\
+									\
+  if ((_rip[0] & 0xf0) == 0x40)  /* REX byte present.  */		\
+    {									\
+      unsigned char _rex = _rip[0] & 0x0f;				\
+      _is_64_bit = (_rex & 0x08) != 0;					\
+      _rip++;								\
+    }									\
+									\
+  /* Detect a signed division of Integer.MIN_VALUE or Long.MIN_VALUE.  */ \
+  if (_rip[0] == 0xf7)							\
+    {									\
+      bool _min_value_dividend = false;					\
+      unsigned char _modrm = _rip[1];					\
+									\
+      if (((_modrm >> 3) & 7) == 7)					\
+	{								\
+	  if (_is_64_bit)						\
+	    _min_value_dividend = (_sc->rax == 0x8000000000000000L);	\
+	  else								\
+	    _min_value_dividend = ((_sc->rax & 0xffffffff) == 0x80000000); \
+	}								\
+									\
+      if (_min_value_dividend)						\
+	{								\
+	  unsigned char _rm = _modrm & 7;				\
+	  _sc->rdx = 0; /* the remainder is zero */			\
+	  switch (_modrm >> 6)						\
+	    {								\
+	    case 0:  /* register indirect */				\
+	      if (_rm == 5)   /* 32-bit displacement */			\
+		_rip += 4;						\
+	      if (_rm == 4)  /* A SIB byte follows the ModR/M byte */	\
+		_rip += 1;						\
+	      break;							\
+	    case 1:  /* register indirect + 8-bit displacement */	\
+	      _rip += 1;						\
+	      if (_rm == 4)  /* A SIB byte follows the ModR/M byte */	\
+		_rip += 1;						\
+	      break;							\
+	    case 2:  /* register indirect + 32-bit displacement */	\
+	      _rip += 4;						\
+	      if (_rm == 4)  /* A SIB byte follows the ModR/M byte */	\
+		_rip += 1;						\
+	      break;							\
+	    case 3:							\
+	      break;							\
+	    }								\
+	  _rip += 2;							\
+	  _sc->rip = (unsigned long)_rip;				\
+	  return;							\
+	}								\
+    }									\
+}									\
+while (0)
 
 extern "C" 
 {
@@ -65,12 +144,29 @@
   }								\
 while (0)  
 
-/* We use syscall(SYS_rt_sigaction) in INIT_SEGV instead of
- * sigaction() because on some systems the pthreads wrappers for
- * signal handlers are not compiled with unwind information, so it's
- * not possible to unwind through them.  This is a problem that will
- * go away if all systems ever have pthreads libraries that are
- * compiled with unwind info.  */
+#define INIT_FPE						\
+do								\
+  {								\
+    struct kernel_sigaction act;				\
+    act.k_sa_sigaction = _Jv_catch_fpe;				\
+    sigemptyset (&act.k_sa_mask);				\
+    act.k_sa_flags = SA_SIGINFO|0x4000000;			\
+    act.k_sa_restorer = restore_rt;				\
+    syscall (SYS_rt_sigaction, SIGFPE, &act, NULL, _NSIG / 8);	\
+  }								\
+while (0)  
+
+/* You might wonder why we use syscall(SYS_sigaction) in INIT_FPE
+ * instead of the standard sigaction().  This is necessary because of
+ * the shenanigans above where we increment the PC saved in the
+ * context and then return.  This trick will only work when we are
+ * called _directly_ by the kernel, because linuxthreads wraps signal
+ * handlers and its wrappers do not copy the sigcontext struct back
+ * when returning from a signal handler.  If we return from our divide
+ * handler to a linuxthreads wrapper, we will lose the PC adjustment
+ * we made and return to the faulting instruction again.  Using
+ * syscall(SYS_sigaction) causes our handler to be called directly
+ * by the kernel, bypassing any wrappers.  */
 
 #endif /* JAVA_SIGNAL_H */