This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[UPC 20/22] libgupc runtime library [2/9]


[NOTE: Due to email list size limits, this patch is broken into 9 parts.]

Background
----------

An overview email, describing the UPC-related changes is here:
  https://gcc.gnu.org/ml/gcc-patches/2015-12/msg00005.html

The GUPC branch is described here:
  http://gcc.gnu.org/projects/gupc.html

The UPC-related source code differences are summarized here:
  http://gccupc.org/gupc-changes

All languages (c, c++, fortran, go, lto, objc, obj-c++) have been
bootstrapped; no test suite regressions were introduced,
relative to the GCC trunk.

If you are on the cc-list, your name was chosen either
because you are listed as a maintainer for the area that
applies to the patches described in this email, or you
were a frequent contributor of patches made to files listed
in this email.

In the change log entries included in each patch, the directory
containing the affected files is listed, followed by the files.
When the patches are applied, the change log entries will be
distributed to the appropriate ChangeLog file.

Overview
--------

Libgupc is the UPC runtime library, for GUPC.  The configuration,
makefile, and documentation related changes have been broken out into
separate patches.

As noted in the ChangeLog entry below, this is all new code.
Two communication layers are supported: (1) SMP, via 'mmap'
or (2) the Portals4 library API, which supports multi-node
operation.  Libgupc generally requires a POSIX-compliant target OS.

The 'smp' runtime is the default runtime.  The 'portals4'
runtime is experimental; it supports multi-node operation
using the Portals4 communications library.

Most of the libgupc/include/ directory contains standard headers
defined by the UPC language specification. 'make install' will
install these headers in the directory where other "C"
header files are located.

2015-11-30  Gary Funck  <gary@intrepid.com>

	libgupc/smp/
	* upc_access.c: New.
	* upc_access.h: New.
	* upc_accessg.c: New.
	* upc_addr.c: New.
	* upc_affinity.c: New.
	* upc_affinity.h: New.
	* upc_affinity_stub.c: New.
	* upc_alloc.upc: New.
	* upc_allocg.upc: New.
	* upc_atomic.upc: New.
	* upc_backtrace.c: New.
	* upc_backtrace.h: New.
	* upc_barrier.upc: New.
	* upc_castable.upc: New.
	* upc_config.h: New.
	* upc_debug.c: New.
	* upc_debug.h: New.
	* upc_defs.h: New.
	* upc_gasp.c: New.
	* upc_gum.c: New.
	* upc_libat_lock.c: New.
	* upc_libat_lock.h: New.

Index: libgupc/smp/upc_access.c
===================================================================
--- libgupc/smp/upc_access.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_access.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,455 @@
+/* Copyright (C) 2001-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+#include "upc_access.h"
+#include "upc_sync.h"
+#include "upc_sup.h"
+#include "upc_mem.h"
+
+//begin lib_inline_access
+
+__attribute__((__always_inline__))
+static inline
+void *
+__upc_access_sptr_to_addr (upc_shared_ptr_t p)
+{
+  if (GUPCR_PTS_IS_NULL (p))
+    __upc_fatal ("Invalid access via null shared pointer");
+  if ((int)GUPCR_PTS_THREAD(p) >= THREADS)
+    __upc_fatal ("Thread number in shared address is out of range");
+  return __upc_sptr_to_addr (p);
+}
+
+//inline
+u_intQI_t
+__getqi2 (upc_shared_ptr_t p)
+{
+  const u_intQI_t *addr = (u_intQI_t *) __upc_access_sptr_to_addr (p);
+  return *addr;
+}
+
+//inline
+u_intHI_t
+__gethi2 (upc_shared_ptr_t p)
+{
+  const u_intHI_t *addr = (u_intHI_t *) __upc_access_sptr_to_addr (p);
+  return *addr;
+}
+
+//inline
+u_intSI_t
+__getsi2 (upc_shared_ptr_t p)
+{
+  const u_intSI_t *addr = (u_intSI_t *) __upc_access_sptr_to_addr (p);
+  return *addr;
+}
+
+//inline
+u_intDI_t
+__getdi2 (upc_shared_ptr_t p)
+{
+  const u_intDI_t *addr = (u_intDI_t *) __upc_access_sptr_to_addr (p);
+  return *addr;
+}
+
+#if GUPCR_TARGET64
+//inline
+u_intTI_t
+__getti2 (upc_shared_ptr_t p)
+{
+  const u_intTI_t *addr = (u_intTI_t *) __upc_access_sptr_to_addr (p);
+  return *addr;
+}
+#endif /* GUPCR_TARGET64 */
+
+//inline
+float
+__getsf2 (upc_shared_ptr_t p)
+{
+  const float *addr = (float *) __upc_access_sptr_to_addr (p);
+  return *addr;
+}
+
+//inline
+double
+__getdf2 (upc_shared_ptr_t p)
+{
+  const double *addr = (double *) __upc_access_sptr_to_addr (p);
+  return *addr;
+}
+
+//inline
+long double
+__gettf2 (upc_shared_ptr_t p)
+{
+  const long double *addr = (long double *) __upc_access_sptr_to_addr (p);
+  return *addr;
+}
+
+//inline
+long double
+__getxf2 (upc_shared_ptr_t p)
+{
+  const long double *addr = (long double *) __upc_access_sptr_to_addr (p);
+  return *addr;
+}
+
+//inline
+void
+__getblk3 (void *dest, upc_shared_ptr_t src, size_t n)
+{
+  __upc_memget (dest, src, n);
+}
+
+//inline
+void
+__putqi2 (upc_shared_ptr_t p, u_intQI_t v)
+{
+  u_intQI_t * const addr = (u_intQI_t *) __upc_access_sptr_to_addr (p);
+  *addr = v;
+}
+
+//inline
+void
+__puthi2 (upc_shared_ptr_t p, u_intHI_t v)
+{
+  u_intHI_t * const addr = (u_intHI_t *) __upc_access_sptr_to_addr (p);
+  *addr = v;
+}
+
+//inline
+void
+__putsi2 (upc_shared_ptr_t p, u_intSI_t v)
+{
+  u_intSI_t * const addr = (u_intSI_t *) __upc_access_sptr_to_addr (p);
+  *addr = v;
+}
+
+//inline
+void
+__putdi2 (upc_shared_ptr_t p, u_intDI_t v)
+{
+  u_intDI_t * const addr = (u_intDI_t *) __upc_access_sptr_to_addr (p);
+  *addr = v;
+}
+
+#if GUPCR_TARGET64
+//inline
+void
+__putti2 (upc_shared_ptr_t p, u_intTI_t v)
+{
+  u_intTI_t * const addr = (u_intTI_t *) __upc_access_sptr_to_addr (p);
+  *addr = v;
+}
+#endif /* GUPCR_TARGET64 */
+
+//inline
+void
+__putsf2 (upc_shared_ptr_t p, float v)
+{
+  float * const addr = (float *) __upc_access_sptr_to_addr (p);
+  *addr = v;
+}
+
+//inline
+void
+__putdf2 (upc_shared_ptr_t p, double v)
+{
+  double * const addr = (double *) __upc_access_sptr_to_addr (p);
+  *addr = v;
+}
+
+//inline
+void
+__puttf2 (upc_shared_ptr_t p, long double v)
+{
+  long double * const addr = (long double *) __upc_access_sptr_to_addr (p);
+  *addr = v;
+}
+
+//inline
+void
+__putxf2 (upc_shared_ptr_t p, long double v)
+{
+  long double * const addr = (long double *) __upc_access_sptr_to_addr (p);
+  *addr = v;
+}
+
+//inline
+void
+__putblk3 (upc_shared_ptr_t dest, void *src, size_t n)
+{
+  __upc_memput (dest, src, n);
+}
+
+//inline
+void
+__copyblk3 (upc_shared_ptr_t dest, upc_shared_ptr_t src, size_t n)
+{
+  __upc_memcpy (dest, src, n);
+}
+
+/* Strict memory accesses. */
+
+//inline
+u_intQI_t
+__getsqi2 (upc_shared_ptr_t p)
+{
+  const u_intQI_t *addr = (u_intQI_t *) __upc_access_sptr_to_addr (p);
+  u_intQI_t result;
+  GUPCR_FENCE ();
+  result = *addr;
+  GUPCR_READ_FENCE ();
+  return result;
+}
+
+//inline
+u_intHI_t
+__getshi2 (upc_shared_ptr_t p)
+{
+  const u_intHI_t *addr = (u_intHI_t *) __upc_access_sptr_to_addr (p);
+  u_intHI_t result;
+  GUPCR_FENCE ();
+  result = *addr;
+  GUPCR_READ_FENCE ();
+  return result;
+}
+
+//inline
+u_intSI_t
+__getssi2 (upc_shared_ptr_t p)
+{
+  const u_intSI_t *addr = (u_intSI_t *) __upc_access_sptr_to_addr (p);
+  u_intSI_t result;
+  GUPCR_FENCE ();
+  result = *addr;
+  GUPCR_READ_FENCE ();
+  return result;
+}
+
+//inline
+u_intDI_t
+__getsdi2 (upc_shared_ptr_t p)
+{
+  const u_intDI_t *addr = (u_intDI_t *) __upc_access_sptr_to_addr (p);
+  u_intDI_t result;
+  GUPCR_FENCE ();
+  result = *addr;
+  GUPCR_READ_FENCE ();
+  return result;
+}
+
+#if GUPCR_TARGET64
+//inline
+u_intTI_t
+__getsti2 (upc_shared_ptr_t p)
+{
+  const u_intTI_t *addr = (u_intTI_t *) __upc_access_sptr_to_addr (p);
+  u_intTI_t result;
+  GUPCR_FENCE ();
+  result = *addr;
+  GUPCR_READ_FENCE ();
+  return result;
+}
+#endif /* GUPCR_TARGET64 */
+
+//inline
+float
+__getssf2 (upc_shared_ptr_t p)
+{
+  const float *addr = (float *) __upc_access_sptr_to_addr (p);
+  float result;
+  GUPCR_FENCE ();
+  result = *addr;
+  GUPCR_READ_FENCE ();
+  return result;
+}
+
+//inline
+double
+__getsdf2 (upc_shared_ptr_t p)
+{
+  const double *addr = (double *) __upc_access_sptr_to_addr (p);
+  double result;
+  GUPCR_FENCE ();
+  result = *addr;
+  GUPCR_READ_FENCE ();
+  return result;
+}
+
+//inline
+long double
+__getstf2 (upc_shared_ptr_t p)
+{
+  const long double *addr = (long double *) __upc_access_sptr_to_addr (p);
+  long double result;
+  GUPCR_FENCE ();
+  result = *addr;
+  GUPCR_READ_FENCE ();
+  return result;
+}
+
+//inline
+long double
+__getsxf2 (upc_shared_ptr_t p)
+{
+  const long double *addr = (long double *) __upc_access_sptr_to_addr (p);
+  long double result;
+  GUPCR_FENCE ();
+  result = *addr;
+  GUPCR_READ_FENCE ();
+  return result;
+}
+
+//inline
+void
+__getsblk3 (void *dest, upc_shared_ptr_t src, size_t len)
+{
+  GUPCR_FENCE ();
+  __getblk3 (dest, src, len);
+  GUPCR_READ_FENCE ();
+}
+
+//inline
+void
+__putsqi2 (upc_shared_ptr_t p, u_intQI_t v)
+{
+  u_intQI_t *addr = (u_intQI_t *) __upc_access_sptr_to_addr (p);
+  GUPCR_WRITE_FENCE ();
+  *addr = v;
+  GUPCR_FENCE ();
+}
+
+//inline
+void
+__putshi2 (upc_shared_ptr_t p, u_intHI_t v)
+{
+  u_intHI_t *addr = (u_intHI_t *) __upc_access_sptr_to_addr (p);
+  GUPCR_WRITE_FENCE ();
+  *addr = v;
+  GUPCR_FENCE ();
+}
+
+//inline
+void
+__putssi2 (upc_shared_ptr_t p, u_intSI_t v)
+{
+  u_intSI_t *addr = (u_intSI_t *) __upc_access_sptr_to_addr (p);
+  GUPCR_WRITE_FENCE ();
+  *addr = v;
+  GUPCR_FENCE ();
+}
+
+//inline
+void
+__putsdi2 (upc_shared_ptr_t p, u_intDI_t v)
+{
+  u_intDI_t *addr = (u_intDI_t *) __upc_access_sptr_to_addr (p);
+  GUPCR_WRITE_FENCE ();
+  *addr = v;
+  GUPCR_FENCE ();
+}
+
+#if GUPCR_TARGET64
+//inline
+void
+__putsti2 (upc_shared_ptr_t p, u_intTI_t v)
+{
+  u_intTI_t *addr = (u_intTI_t *) __upc_access_sptr_to_addr (p);
+  GUPCR_WRITE_FENCE ();
+  *addr = v;
+  GUPCR_FENCE ();
+}
+#endif /* GUPCR_TARGET64 */
+
+//inline
+void
+__putssf2 (upc_shared_ptr_t p, float v)
+{
+  float *addr = (float *) __upc_access_sptr_to_addr (p);
+  GUPCR_WRITE_FENCE ();
+  *addr = v;
+  GUPCR_FENCE ();
+}
+
+//inline
+void
+__putsdf2 (upc_shared_ptr_t p, double v)
+{
+  double *addr = (double *) __upc_access_sptr_to_addr (p);
+  GUPCR_WRITE_FENCE ();
+  *addr = v;
+  GUPCR_FENCE ();
+}
+
+//inline
+void
+__putstf2 (upc_shared_ptr_t p, long double v)
+{
+  long double *addr = (long double *) __upc_access_sptr_to_addr (p);
+  GUPCR_WRITE_FENCE ();
+  *addr = v;
+  GUPCR_FENCE ();
+}
+
+//inline
+void
+__putsxf2 (upc_shared_ptr_t p, long double v)
+{
+  long double *addr = (long double *) __upc_access_sptr_to_addr (p);
+  GUPCR_WRITE_FENCE ();
+  *addr = v;
+  GUPCR_FENCE ();
+}
+
+//inline
+void
+__putsblk3 (upc_shared_ptr_t dest, void *src, size_t len)
+{
+  GUPCR_WRITE_FENCE ();
+  __putblk3 (dest, src, len);
+  GUPCR_FENCE ();
+}
+
+//inline
+void
+__copysblk3 (upc_shared_ptr_t dest, upc_shared_ptr_t src, size_t len)
+{
+  GUPCR_WRITE_FENCE ();
+  __copyblk3 (dest, src, len);
+  GUPCR_FENCE ();
+}
+
+//inline
+void
+__upc_fence (void)
+{
+  GUPCR_FENCE ();
+}
+//end lib_inline_access
Index: libgupc/smp/upc_access.h
===================================================================
--- libgupc/smp/upc_access.h	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_access.h	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,180 @@
+/* Copyright (C) 2006-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#ifndef _UPC_ACCESS_H_
+#define _UPC_ACCESS_H_
+
+
+//begin lib_access_prototypes
+/* relaxed accesses */
+
+extern u_intQI_t __getqi2 (upc_shared_ptr_t);
+extern u_intHI_t __gethi2 (upc_shared_ptr_t);
+extern u_intSI_t __getsi2 (upc_shared_ptr_t);
+extern u_intDI_t __getdi2 (upc_shared_ptr_t);
+#if GUPCR_TARGET64
+extern u_intTI_t __getti2 (upc_shared_ptr_t);
+#endif
+extern float __getsf2 (upc_shared_ptr_t);
+extern double __getdf2 (upc_shared_ptr_t);
+extern long double __gettf2 (upc_shared_ptr_t);
+extern long double __getxf2 (upc_shared_ptr_t);
+extern void __getblk3 (void *, upc_shared_ptr_t, size_t);
+
+extern void __putqi2 (upc_shared_ptr_t, u_intQI_t);
+extern void __puthi2 (upc_shared_ptr_t, u_intHI_t);
+extern void __putsi2 (upc_shared_ptr_t, u_intSI_t);
+extern void __putdi2 (upc_shared_ptr_t, u_intDI_t);
+#if GUPCR_TARGET64
+extern void __putti2 (upc_shared_ptr_t, u_intTI_t);
+#endif
+extern void __putsf2 (upc_shared_ptr_t, float);
+extern void __putdf2 (upc_shared_ptr_t, double);
+extern void __puttf2 (upc_shared_ptr_t, long double);
+extern void __putxf2 (upc_shared_ptr_t, long double);
+extern void __putblk3 (upc_shared_ptr_t, void *, size_t);
+extern void __copyblk3 (upc_shared_ptr_t, upc_shared_ptr_t, size_t);
+
+/* strict accesses */
+
+extern u_intQI_t __getsqi2 (upc_shared_ptr_t);
+extern u_intHI_t __getshi2 (upc_shared_ptr_t);
+extern u_intSI_t __getssi2 (upc_shared_ptr_t);
+extern u_intDI_t __getsdi2 (upc_shared_ptr_t);
+#if GUPCR_TARGET64
+extern u_intTI_t __getsti2 (upc_shared_ptr_t);
+#endif
+extern float __getssf2 (upc_shared_ptr_t);
+extern double __getsdf2 (upc_shared_ptr_t);
+extern long double __getstf2 (upc_shared_ptr_t);
+extern long double __getsxf2 (upc_shared_ptr_t);
+extern void __getsblk3 (void *, upc_shared_ptr_t, size_t);
+
+extern void __putsqi2 (upc_shared_ptr_t, u_intQI_t);
+extern void __putshi2 (upc_shared_ptr_t, u_intHI_t);
+extern void __putssi2 (upc_shared_ptr_t, u_intSI_t);
+extern void __putsdi2 (upc_shared_ptr_t, u_intDI_t);
+#if GUPCR_TARGET64
+extern void __putsti2 (upc_shared_ptr_t, u_intTI_t);
+#endif
+extern void __putssf2 (upc_shared_ptr_t, float);
+extern void __putsdf2 (upc_shared_ptr_t, double);
+extern void __putstf2 (upc_shared_ptr_t, long double);
+extern void __putsxf2 (upc_shared_ptr_t, long double);
+extern void __putsblk3 (upc_shared_ptr_t, void *, size_t);
+extern void __copysblk3 (upc_shared_ptr_t, upc_shared_ptr_t, size_t);
+
+/* relaxed accesses (profiled) */
+
+extern u_intQI_t __getgqi3 (upc_shared_ptr_t, const char *file, int line);
+extern u_intHI_t __getghi3 (upc_shared_ptr_t, const char *file, int line);
+extern u_intSI_t __getgsi3 (upc_shared_ptr_t, const char *file, int line);
+extern u_intDI_t __getgdi3 (upc_shared_ptr_t, const char *file, int line);
+#if GUPCR_TARGET64
+extern u_intTI_t __getgti3 (upc_shared_ptr_t, const char *file, int line);
+#endif
+extern float __getgsf3 (upc_shared_ptr_t, const char *file, int line);
+extern double __getgdf3 (upc_shared_ptr_t, const char *file, int line);
+extern long double __getgtf3 (upc_shared_ptr_t, const char *file, int line);
+extern long double __getgxf3 (upc_shared_ptr_t, const char *file, int line);
+extern void __getgblk5 (void *, upc_shared_ptr_t, size_t, const char *file,
+			int line);
+
+extern void __putgqi4 (upc_shared_ptr_t, u_intQI_t, const char *file,
+		       int line);
+extern void __putghi4 (upc_shared_ptr_t, u_intHI_t, const char *file,
+		       int line);
+extern void __putgsi4 (upc_shared_ptr_t, u_intSI_t, const char *file,
+		       int line);
+extern void __putgdi4 (upc_shared_ptr_t, u_intDI_t, const char *file,
+		       int line);
+#if GUPCR_TARGET64
+extern void __putgti4 (upc_shared_ptr_t, u_intTI_t, const char *file,
+		       int line);
+#endif
+extern void __putgsf4 (upc_shared_ptr_t, float, const char *file, int line);
+extern void __putgdf4 (upc_shared_ptr_t, double, const char *file, int line);
+extern void __putgtf4 (upc_shared_ptr_t, long double, const char *file, int line);
+extern void __putgxf4 (upc_shared_ptr_t, long double, const char *file, int line);
+extern void __putgblk5 (upc_shared_ptr_t, void *, size_t, const char *file,
+			int line);
+extern void __copygblk5 (upc_shared_ptr_t, upc_shared_ptr_t, size_t,
+			 const char *file, int line);
+
+/* strict accesses (profiled) */
+
+extern u_intQI_t __getsgqi3 (upc_shared_ptr_t, const char *file, int line);
+extern u_intHI_t __getsghi3 (upc_shared_ptr_t, const char *file, int line);
+extern u_intSI_t __getsgsi3 (upc_shared_ptr_t, const char *file, int line);
+extern u_intDI_t __getsgdi3 (upc_shared_ptr_t, const char *file, int line);
+#if GUPCR_TARGET64
+extern u_intTI_t __getsgti3 (upc_shared_ptr_t, const char *file, int line);
+#endif
+extern float __getsgsf3 (upc_shared_ptr_t, const char *file, int line);
+extern double __getsgdf3 (upc_shared_ptr_t, const char *file, int line);
+extern long double __getsgtf3 (upc_shared_ptr_t, const char *file, int line);
+extern long double __getsgxf3 (upc_shared_ptr_t, const char *file, int line);
+extern void __getsgblk5 (void *, upc_shared_ptr_t, size_t, const char *file,
+			 int line);
+
+extern void __putsgqi4 (upc_shared_ptr_t, u_intQI_t, const char *file,
+			int line);
+extern void __putsghi4 (upc_shared_ptr_t, u_intHI_t, const char *file,
+			int line);
+extern void __putsgsi4 (upc_shared_ptr_t, u_intSI_t, const char *file,
+			int line);
+extern void __putsgdi4 (upc_shared_ptr_t, u_intDI_t, const char *file,
+			int line);
+#if GUPCR_TARGET64
+extern void __putsgti4 (upc_shared_ptr_t, u_intTI_t, const char *file,
+			int line);
+#endif
+extern void __putsgsf4 (upc_shared_ptr_t, float, const char *file, int line);
+extern void __putsgdf4 (upc_shared_ptr_t, double, const char *file, int line);
+extern void __putsgtf4 (upc_shared_ptr_t, long double, const char *file, int line);
+extern void __putsgxf4 (upc_shared_ptr_t, long double, const char *file, int line);
+extern void __putsgblk5 (upc_shared_ptr_t, void *, size_t, const char *file,
+			 int line);
+extern void __copysgblk5 (upc_shared_ptr_t, upc_shared_ptr_t, size_t,
+			  const char *file, int line);
+
+/* Miscellaneous access related prototypes.  */
+extern void __upc_fence (void);
+
+//end lib_access_prototypes
+
+/* memory-to-memory operations (profiled) */
+extern void upc_memcpyg (upc_shared_ptr_t dest, upc_shared_ptr_t src,
+			 size_t n, const char *filename, int linenum);
+extern void upc_memgetg (void *dest, upc_shared_ptr_t src, size_t n,
+			 const char *filename, int linenum);
+extern void upc_memputg (upc_shared_ptr_t dest, const void *src, size_t n,
+			 const char *filename, int linenum);
+extern void upc_memsetg (upc_shared_ptr_t dest, int c, size_t n,
+			 const char *filename, int linenum);
+
+#endif /* _UPC_ACCESS_H_ */
Index: libgupc/smp/upc_accessg.c
===================================================================
--- libgupc/smp/upc_accessg.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_accessg.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,557 @@
+/* Copyright (C) 2009-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+#include "upc_access.h"
+#include "upc_lib.h"
+#include "gasp_upc.h"
+#include "upc_pupc.h"
+
+/* relaxed accesses (profiled) */
+
+u_intQI_t
+__getgqi3 (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  u_intQI_t val;
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  val = __getqi2 (p);
+  p_end (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  GUPCR_CLEAR_ERR_LOC();
+  return val;
+}
+
+u_intHI_t
+__getghi3 (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  u_intHI_t val;
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  val = __gethi2 (p);
+  p_end (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  GUPCR_CLEAR_ERR_LOC();
+  return val;
+}
+
+u_intSI_t
+__getgsi3 (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  u_intSI_t val;
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  val = __getsi2 (p);
+  p_end (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  GUPCR_CLEAR_ERR_LOC();
+  return val;
+}
+
+u_intDI_t
+__getgdi3 (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  u_intDI_t val;
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  val = __getdi2 (p);
+  p_end (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  GUPCR_CLEAR_ERR_LOC();
+  return val;
+}
+
+#if GUPCR_TARGET64
+u_intTI_t
+__getgti3 (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  u_intTI_t val;
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  val = __getti2 (p);
+  p_end (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  GUPCR_CLEAR_ERR_LOC();
+  return val;
+}
+#endif
+
+float
+__getgsf3 (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  float val;
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  val = __getsf2 (p);
+  p_end (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  GUPCR_CLEAR_ERR_LOC();
+  return val;
+}
+
+double
+__getgdf3 (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  double val;
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  val = __getdf2 (p);
+  p_end (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  GUPCR_CLEAR_ERR_LOC();
+  return val;
+}
+
+long double
+__getgtf3 (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  long double val;
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  val = __gettf2 (p);
+  p_end (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  GUPCR_CLEAR_ERR_LOC();
+  return val;
+}
+
+long double
+__getgxf3 (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  long double val;
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  val = __getxf2 (p);
+  p_end (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  GUPCR_CLEAR_ERR_LOC();
+  return val;
+}
+
+void
+__getgblk5 (void *dest, upc_shared_ptr_t src, size_t n, const char *filename,
+	    int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, dest, &src, n);
+  __getblk3 (dest, src, n);
+  p_end (GASP_UPC_GET, 1, dest, &src, n);
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+__putgqi4 (upc_shared_ptr_t p, u_intQI_t v, const char *filename, int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  __putqi2 (p, v);
+  p_end (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+__putghi4 (upc_shared_ptr_t p, u_intHI_t v, const char *filename, int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  __puthi2 (p, v);
+  p_end (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+__putgsi4 (upc_shared_ptr_t p, u_intSI_t v, const char *filename, int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  __putsi2 (p, v);
+  p_end (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+__putgdi4 (upc_shared_ptr_t p, u_intDI_t v, const char *filename, int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  __putdi2 (p, v);
+  p_end (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+#if GUPCR_TARGET64
+void
+__putgti4 (upc_shared_ptr_t p, u_intTI_t v, const char *filename, int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  __putti2 (p, v);
+  p_end (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  GUPCR_CLEAR_ERR_LOC();
+}
+#endif
+
+void
+__putgsf4 (upc_shared_ptr_t p, float v, const char *filename, int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  __putsf2 (p, v);
+  p_end (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+__putgdf4 (upc_shared_ptr_t p, double v, const char *filename, int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  __putdf2 (p, v);
+  p_end (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+__putgtf4 (upc_shared_ptr_t p, long double v, const char *filename, int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  __puttf2 (p, v);
+  p_end (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+__putgxf4 (upc_shared_ptr_t p, long double v, const char *filename, int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  __putxf2 (p, v);
+  p_end (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+__putgblk5 (upc_shared_ptr_t dest, void *src, size_t n, const char *filename,
+	    int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &dest, src, n);
+  __putblk3 (dest, src, n);
+  p_end (GASP_UPC_PUT, 1, &dest, src, n);
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+__copygblk5 (upc_shared_ptr_t dest, upc_shared_ptr_t src, size_t n,
+	     const char *filename, int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_MEMCPY, &dest, &src, n);
+  __copyblk3 (dest, src, n);
+  p_end (GASP_UPC_MEMCPY, &dest, &src, n);
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+/* strict accesses (profiled) */
+
+u_intQI_t
+__getsgqi3 (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  u_intQI_t val;
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  val = __getsqi2 (p);
+  p_end (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  GUPCR_CLEAR_ERR_LOC();
+  return val;
+}
+
+u_intHI_t
+__getsghi3 (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  u_intHI_t val;
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  val = __getshi2 (p);
+  p_end (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  GUPCR_CLEAR_ERR_LOC();
+  return val;
+}
+
+u_intSI_t
+__getsgsi3 (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  u_intSI_t val;
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  val = __getssi2 (p);
+  p_end (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  GUPCR_CLEAR_ERR_LOC();
+  return val;
+}
+
+u_intDI_t
+__getsgdi3 (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  u_intDI_t val;
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  val = __getsdi2 (p);
+  p_end (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  GUPCR_CLEAR_ERR_LOC();
+  return val;
+}
+
+#if GUPCR_TARGET64
+u_intTI_t
+__getsgti3 (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  u_intTI_t val;
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  val = __getsti2 (p);
+  p_end (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  GUPCR_CLEAR_ERR_LOC();
+  return val;
+}
+#endif
+
+float
+__getsgsf3 (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  float val;
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  val = __getssf2 (p);
+  p_end (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  GUPCR_CLEAR_ERR_LOC();
+  return val;
+}
+
+double
+__getsgdf3 (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  double val;
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  val = __getsdf2 (p);
+  p_end (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  GUPCR_CLEAR_ERR_LOC();
+  return val;
+}
+
+long double
+__getsgtf3 (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  long double val;
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  val = __getstf2 (p);
+  p_end (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  GUPCR_CLEAR_ERR_LOC();
+  return val;
+}
+
+long double
+__getsgxf3 (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  long double val;
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  val = __getsxf2 (p);
+  p_end (GASP_UPC_GET, 1, &val, &p, sizeof (val));
+  GUPCR_CLEAR_ERR_LOC();
+  return val;
+}
+
+void
+__getsgblk5 (void *dest, upc_shared_ptr_t src, size_t n, const char *filename,
+	     int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_GET, 1, dest, &src, n);
+  __getblk3 (dest, src, n);
+  p_end (GASP_UPC_GET, 1, dest, &src, n);
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+__putsgqi4 (upc_shared_ptr_t p, u_intQI_t v, const char *filename,
+	    int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  __putsqi2 (p, v);
+  p_end (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+__putsghi4 (upc_shared_ptr_t p, u_intHI_t v, const char *filename,
+	    int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  __putshi2 (p, v);
+  p_end (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+__putsgsi4 (upc_shared_ptr_t p, u_intSI_t v, const char *filename,
+	    int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  __putssi2 (p, v);
+  p_end (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+__putsgdi4 (upc_shared_ptr_t p, u_intDI_t v, const char *filename,
+	    int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  __putsdi2 (p, v);
+  p_end (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+#if GUPCR_TARGET64
+void
+__putsgti4 (upc_shared_ptr_t p, u_intTI_t v, const char *filename,
+	    int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  __putsti2 (p, v);
+  p_end (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  GUPCR_CLEAR_ERR_LOC();
+}
+#endif
+
+void
+__putsgsf4 (upc_shared_ptr_t p, float v, const char *filename, int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  __putssf2 (p, v);
+  p_end (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+__putsgdf4 (upc_shared_ptr_t p, double v, const char *filename, int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  __putsdf2 (p, v);
+  p_end (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+__putsgtf4 (upc_shared_ptr_t p, long double v, const char *filename, int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  __putstf2 (p, v);
+  p_end (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+__putsgxf4 (upc_shared_ptr_t p, long double v, const char *filename, int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  __putsxf2 (p, v);
+  p_end (GASP_UPC_PUT, 1, &p, &v, sizeof (v));
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+__putsgblk5 (upc_shared_ptr_t dest, void *src, size_t n, const char *filename,
+	     int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_PUT, 0, &dest, src, n);
+  __putsblk3 (dest, src, n);
+  p_end (GASP_UPC_PUT, 0, &dest, src, n);
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+__copysgblk5 (upc_shared_ptr_t dest, upc_shared_ptr_t src, size_t n,
+	      const char *filename, int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_MEMCPY, &dest, &src, n);
+  __copysblk3 (dest, src, n);
+  p_end (GASP_UPC_MEMCPY, &dest, &src, n);
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+upc_memcpyg (upc_shared_ptr_t dest, upc_shared_ptr_t src, size_t n,
+	     const char *filename, int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_MEMCPY, &dest, &src, n);
+  upc_memcpy (dest, src, n);
+  p_end (GASP_UPC_MEMCPY, &dest, &src, n);
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+upc_memgetg (void *dest, upc_shared_ptr_t src, size_t n, const char *filename,
+	     int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_MEMGET, &dest, &src, n);
+  upc_memget (dest, src, n);
+  p_end (GASP_UPC_MEMGET, &dest, &src, n);
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+upc_memputg (upc_shared_ptr_t dest, const void *src, size_t n,
+	     const char *filename, int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_MEMPUT, &dest, src, n);
+  upc_memput (dest, src, n);
+  p_end (GASP_UPC_MEMPUT, &dest, src, n);
+  GUPCR_CLEAR_ERR_LOC();
+}
+
+void
+upc_memsetg (upc_shared_ptr_t dest, int c, size_t n, const char *filename,
+	     int linenum)
+{
+  GUPCR_SET_ERR_LOC();
+  p_start (GASP_UPC_MEMSET, &dest, c, n);
+  upc_memset (dest, c, n);
+  p_end (GASP_UPC_MEMSET, &dest, c, n);
+  GUPCR_CLEAR_ERR_LOC();
+}
Index: libgupc/smp/upc_addr.c
===================================================================
--- libgupc/smp/upc_addr.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_addr.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,121 @@
+/* Copyright (C) 2001-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+#include "upc_sup.h"
+
+void *
+__cvtaddr (upc_shared_ptr_t p)
+{
+  upc_info_p u = __upc_info;
+  void *addr;
+  if (!u)
+    __upc_fatal ("UPC runtime not initialized");
+  if (GUPCR_PTS_IS_NULL (p))
+    return (void *) 0;
+  addr = __upc_sptr_to_addr (p);
+  return addr;
+}
+
+void *
+__getaddr (upc_shared_ptr_t p)
+{
+  upc_info_p u = __upc_info;
+  void *addr;
+  if (!u)
+    __upc_fatal ("UPC runtime not initialized");
+  if (GUPCR_PTS_IS_NULL (p))
+    return (void *) 0;
+  if ((int)GUPCR_PTS_THREAD(p) != MYTHREAD)
+    __upc_fatal ("Invalid conversion of shared address to local pointer;\nthread does not have affinity to shared address");
+  addr = __upc_sptr_to_addr (p);
+  return addr;
+}
+
+size_t
+upc_threadof (upc_shared_ptr_t p)
+{
+  upc_info_p u = __upc_info;
+  if (!u)
+    __upc_fatal ("UPC runtime not initialized");
+  if ((int)GUPCR_PTS_THREAD(p) >= THREADS)
+    __upc_fatal ("Thread number in shared address is out of range");
+  return GUPCR_PTS_THREAD (p);
+}
+
+size_t
+upc_phaseof (upc_shared_ptr_t p)
+{
+  upc_info_p u = __upc_info;
+  if (!u)
+    __upc_fatal ("UPC runtime not initialized");
+  if ((int)GUPCR_PTS_THREAD(p) >= THREADS)
+    __upc_fatal ("Thread number in shared address is out of range");
+  return GUPCR_PTS_PHASE (p);
+}
+
+upc_shared_ptr_t
+upc_resetphase (upc_shared_ptr_t p)
+{
+  upc_shared_ptr_t result;
+  result = p;
+  GUPCR_PTS_SET_PHASE (result, 0);
+  return result;
+}
+
+size_t
+upc_addrfield (upc_shared_ptr_t p)
+{
+  upc_info_p u = __upc_info;
+  if (!u)
+    __upc_fatal ("UPC runtime not initialized");
+  if ((int)GUPCR_PTS_THREAD(p) >= THREADS)
+    __upc_fatal ("Thread number in shared address is out of range");
+  return (size_t) GUPCR_PTS_VADDR (p);
+}
+
+size_t
+upc_affinitysize (size_t totalsize, size_t nbytes, size_t threadid)
+{ 
+  size_t result;
+  if (nbytes == 0 || totalsize == 0 || nbytes >= totalsize)
+      result = (size_t) (threadid == 0 ? totalsize : 0);
+  else
+    {
+      size_t const nblocks = (totalsize / nbytes);
+      size_t const cutoff = (nblocks % THREADS);
+      if (threadid < cutoff)
+	result = (size_t) ((nblocks + THREADS - 1) / THREADS) * nbytes;
+      else if (threadid > cutoff)
+	result = (size_t) (nblocks / THREADS) * nbytes;
+      else /* threadid == cutoff */
+	result = (size_t) ((nblocks / THREADS) * nbytes)
+			   + totalsize - nblocks * nbytes;
+    }
+  return result;
+}
Index: libgupc/smp/upc_affinity.c
===================================================================
--- libgupc/smp/upc_affinity.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_affinity.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,221 @@
+/* Copyright (C) 2008-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+#include "upc_sup.h"
+#include "upc_affinity.h"
+#include "upc_numa.h"
+
+/* The opaque type upc_cpu_avoid_t, forward references this type.  */
+struct upc_cpu_avoid_struct
+{
+  cpu_set_t cpu_set;
+};
+
+int
+__upc_affinity_supported ()
+{
+  return 1;
+}
+
+/* Calculate the right affinity for the thread based on 
+   the current scheduling and NUMA policies.  Executed by
+   the monitoring thread.  AVOID is a pointer to a data
+   structure that lists cpu's that aren't eligible for
+   allocation.  */
+
+int
+__upc_affinity_init (upc_info_p u, upc_cpu_avoid_p avoid,
+                     const char **err_msg)
+{
+  const upc_sched_policy_t sched_policy = u->sched_policy;
+  const int num_cpus = u->num_cpus;
+  const int num_nodes = u->num_nodes;
+  int t, next_sched_cpu;
+  if (!__upc_numa_init (u, err_msg))
+    return 0;
+  /* Calculate affinity for each thread. */
+  for (t = 0, next_sched_cpu = 0; t < THREADS; ++t)
+    {
+      const upc_thread_info_p tinfo = &u->thread_info[t];
+      int pelem, melem;
+      int sched_affinity, mem_affinity;
+      switch (sched_policy)
+	{
+	case GUPCR_SCHED_POLICY_CPU:
+	  /* One cpu = multiple threads */
+	  pelem = t % num_cpus;
+	  melem = pelem % num_nodes;
+	  sched_affinity = pelem;
+	  mem_affinity = melem;
+	  break;
+	case GUPCR_SCHED_POLICY_CPU_STRICT:
+	  /* One cpu = one thread */
+	  while (CPU_ISSET (next_sched_cpu, &avoid->cpu_set)
+		 && next_sched_cpu < num_cpus)
+	    {
+	      next_sched_cpu += 1;
+	    }
+	  if (next_sched_cpu >= num_cpus)
+	    {
+	      *err_msg = "UPC error: unable to allocate CPU for all threads.";
+	      return 0;
+	    }
+	  pelem = next_sched_cpu;
+	  melem = pelem % num_nodes;
+	  sched_affinity = pelem;
+	  mem_affinity = melem;
+	  next_sched_cpu += 1;
+	  break;
+	case GUPCR_SCHED_POLICY_NODE:
+	  /* Use NUMA for node scheduling */
+	  if (!__upc_numa_allocate (u, t, &sched_affinity, &mem_affinity,
+                                    err_msg))
+	    return 0;
+	  break;
+	default:
+	  /* Auto scheduling */
+	  sched_affinity = -1;
+	  mem_affinity = -1;
+	}
+      tinfo->sched_affinity = sched_affinity;
+      tinfo->mem_affinity = mem_affinity;
+    }
+  return 1;
+}
+
+/* Allocate, and return a pointer to the data structure used to record
+   the list of CPU's that are unavailable.  */
+
+upc_cpu_avoid_p
+__upc_affinity_cpu_avoid_new ()
+{
+  upc_cpu_avoid_p avoid;
+  avoid = (upc_cpu_avoid_p) calloc (1, sizeof (upc_cpu_avoid_t));
+  if (!avoid)
+    {
+      perror ("calloc");
+      abort ();
+    }
+  return avoid;
+}
+
+/* Free the previously allocated data structure that is used
+   to record list of CPU's that are unavailable.  */
+
+void
+__upc_affinity_cpu_avoid_free (const upc_cpu_avoid_p avoid)
+{
+  if (avoid)
+    free ((void *) avoid);
+}
+
+/* Mark CPU as being unavailable for allocation.  */
+
+void
+__upc_affinity_cpu_avoid_set (const int cpu, const upc_cpu_avoid_p avoid)
+{
+  CPU_SET (cpu, &avoid->cpu_set);
+}
+
+#ifdef DEBUG_AFFINITY
+static const char *
+upc_sched_policy_to_string (const upc_sched_policy_t sched_policy)
+{
+  switch (sched_policy)
+    {
+    case GUPCR_SCHED_POLICY_AUTO:
+      return "sched auto";
+    case GUPCR_SCHED_POLICY_NODE:
+      return "sched node";
+    case GUPCR_SCHED_POLICY_CPU:
+      return "sched cpu";
+    case GUPCR_SCHED_POLICY_CPU_STRICT:
+      return "sched strict";
+    }
+  return "sched <unknown>";
+}
+
+static const char *
+upc_mem_policy_to_string (const upc_mem_policy_t mem_policy)
+{
+  switch (mem_policy)
+    {
+    case GUPCR_MEM_POLICY_AUTO:
+      return "mem auto";
+    case GUPCR_MEM_POLICY_NODE:
+      return "mem node";
+    case GUPCR_MEM_POLICY_STRICT:
+      return "mem strict";
+    }
+  return "mem <unknown>";
+}
+#endif /* DEBUG_AFFINITY */
+
+/* Set thread's affinity based on the pre-calculated
+   policies. Executed by each thread as the first thing after thread
+   is created.  */
+
+void
+__upc_affinity_set (upc_info_p u, int thread_id)
+{
+  const upc_thread_info_p tinfo = &u->thread_info[thread_id];
+  switch (u->sched_policy)
+    {
+    case GUPCR_SCHED_POLICY_CPU:
+    case GUPCR_SCHED_POLICY_CPU_STRICT:
+      {
+	const int sched_affinity = tinfo->sched_affinity;
+	cpu_set_t set;
+	CPU_ZERO (&set);
+	CPU_SET (sched_affinity, &set);
+	if (sched_setaffinity (0, sizeof (set), &set))
+	  {
+	    __upc_fatal ("Scheduling cannot be set");
+	  }
+      }
+      break;
+    case GUPCR_SCHED_POLICY_NODE:
+      __upc_numa_sched_set (u, thread_id);
+      break;
+    default:
+      /* auto - no scheduling support */
+      break;
+    }
+  /* set memory policy only if we are not AUTO scheduling */
+  if ((u->sched_policy != GUPCR_SCHED_POLICY_AUTO) &&
+      (u->mem_policy != GUPCR_MEM_POLICY_AUTO))
+    __upc_numa_memory_affinity_set (u, thread_id);
+#ifdef DEBUG_AFFINITY
+  printf ("affinity: %d (%s,%s) scheduling (%d,%d)\n", thread_id,
+	  upc_sched_policy_to_string (u->sched_policy),
+	  upc_mem_policy_to_string (u->mem_policy),
+	  tinfo->sched_affinity, tinfo->mem_affinity);
+#endif /* DEBUG_AFFINITY */
+}
Index: libgupc/smp/upc_affinity.h
===================================================================
--- libgupc/smp/upc_affinity.h	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_affinity.h	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,39 @@
+/* Copyright (C) 2008-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#ifndef _UPC_AFFINITY_H_
+#define _UPC_AFFINITY_H_
+
+extern void __upc_affinity_cpu_avoid_free (const upc_cpu_avoid_p);
+extern upc_cpu_avoid_p __upc_affinity_cpu_avoid_new (void);
+extern void __upc_affinity_cpu_avoid_set (const int, const upc_cpu_avoid_p);
+extern int __upc_affinity_init (const upc_info_p, const upc_cpu_avoid_p,
+				const char **err_msg);
+extern void __upc_affinity_set (const upc_info_p, const int);
+extern int __upc_affinity_supported (void);
+
+#endif /* !_UPC_AFFINITY_H_ */
Index: libgupc/smp/upc_affinity_stub.c
===================================================================
--- libgupc/smp/upc_affinity_stub.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_affinity_stub.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,68 @@
+/* Copyright (C) 2008-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+#include "upc_affinity.h"
+
+int
+__upc_affinity_supported (void)
+{
+  return 0;
+}
+
+int
+__upc_affinity_init (const upc_info_p ARG_UNUSED (u),
+                     const upc_cpu_avoid_p ARG_UNUSED (avoid),
+		     const char **ARG_UNUSED (err_msg))
+{
+  return 1;
+}
+
+upc_cpu_avoid_p
+__upc_affinity_cpu_avoid_new (void)
+{
+  return NULL;
+}
+
+void
+__upc_affinity_cpu_avoid_free (const upc_cpu_avoid_p ARG_UNUSED (avoid))
+{
+}
+
+void
+__upc_affinity_cpu_avoid_set (const int ARG_UNUSED (cpu),
+                              const upc_cpu_avoid_p ARG_UNUSED (avoid))
+{
+}
+
+void
+__upc_affinity_set (const upc_info_p ARG_UNUSED (u),
+                    const int ARG_UNUSED (thread_id))
+{
+}
Index: libgupc/smp/upc_alloc.upc
===================================================================
--- libgupc/smp/upc_alloc.upc	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_alloc.upc	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,435 @@
+/* Copyright (C) 2006-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#include <upc.h>
+#ifdef __sgi__
+/* UPC's definitions conflict with definitions in SGI's
+   header files, which are included by upc_config.h.  */
+#undef barrier
+#undef fence
+#endif /* __sgi__ */
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#define DEBUG_ALLOC 1
+#undef DEBUG_ALLOC
+
+/* upc_alloc.upc implements UPC's dynamic memory allocation
+   routines.  The implementation is written in UPC, because
+   it needs to run above the runtime library's memory mapping
+   facility.  Internal runtime locks are used rather than
+   the UPC language-defined locks, because those locks
+   depend upon dynamic memory management, and we need to
+   break the circular dependency.  */
+
+typedef struct upc_heap_struct
+  {
+    shared struct upc_heap_struct *next;   /* MUST BE FIRST FIELD */
+    size_t size;
+    int alloc_tag;
+    int is_global;
+    int alloc_seq;
+  } upc_heap_t;
+typedef shared upc_heap_t *upc_heap_p;
+#define GUPCR_HEAP_OVERHEAD GUPCR_ROUND (sizeof (upc_heap_t), GUPCR_HEAP_ALLOC_MIN)
+
+static shared upc_heap_p __upc_global_heap;
+static shared upc_heap_p __upc_local_heap[THREADS];
+static shared void * shared __upc_all_alloc_val;
+static shared int __upc_alloc_seq;
+
+#undef NULL
+#define NULL (shared void *)0
+
+typedef union _pts_as_rep
+  {
+    shared void *pts;
+    upc_shared_ptr_t rep;
+  } pts_as_rep_t;
+
+/* Create a shared pointer, given (addrfield, thread)  */
+static inline
+shared void *
+__upc_alloc_build_pts (size_t addrfield, size_t thread)
+{
+  pts_as_rep_t r;
+  r.pts = NULL;
+  GUPCR_PTS_SET_VADDR  (r.rep, addrfield);
+  GUPCR_PTS_SET_THREAD (r.rep, thread);
+  return r.pts;
+}
+
+/* Increment a shared pointer, by nbytes */
+static inline
+shared void *
+__upc_alloc_ptr_add (shared void *ptr, ptrdiff_t nbytes)
+{
+  return (shared void *)(((shared [] char *)ptr) + nbytes);
+}
+
+#ifdef DEBUG_ALLOC
+static
+char *
+__upc_alloc_sptostr (shared void *p)
+{
+  static char s[100];
+  sprintf (s, "(0x%012lx,0x%02x,0x%016lx)",
+    (long unsigned int)upc_phaseof(p), (unsigned int)upc_threadof(p),
+    (long unsigned int)upc_addrfield(p));
+  return s;
+}
+#endif /* DEBUG_ALLOC */
+
+/* upc_heap_init() is called from the runtime to initially
+   create the heap.  Heap_base is the virtual address
+   of where the heap should begin, and heap_size is the
+   initial heap_size.  The caller has already allocated
+   the underlying space.  Note that the lower level
+   heap manager doesn't use locks -- all locking must
+   be done at a higher level.  */
+
+void
+__upc_heap_init (upc_shared_ptr_t heap_base, size_t heap_size)
+{
+  int t;
+  upc_heap_p heap;
+  heap = *((upc_heap_p *)&heap_base);
+  upc_memset (heap, '\0', sizeof (upc_heap_t));
+  __upc_alloc_seq = 0;
+  /* the size of each free list entry includes its overhead. */
+  heap->size = heap_size;
+  heap->next = NULL;
+  heap->is_global = 1;
+  heap->alloc_seq = ++__upc_alloc_seq;
+  __upc_global_heap = heap;
+  for (t = 0; t < THREADS; ++t)
+    __upc_local_heap[t] = NULL;
+}
+
+/* Allocate a block of size 'alloc_size' identified indirectly
+   via 'heap_p'.  'alloc_size' must include the heap overhead.
+   The 'global_flag' is simply copied into the newly allocated
+   heap node.  A pointer to the heap node is returned.  */
+
+static
+upc_heap_p
+__upc_heap_alloc (shared upc_heap_p *heap_p, size_t alloc_size,
+                    int global_flag)
+{
+  shared upc_heap_p *p;
+  upc_heap_p alloc;
+#ifdef DEBUG_ALLOC
+  printf ("%d: --> __upc_heap_alloc (%ld): heap on entry\n", MYTHREAD, (long int) alloc_size);
+  for (p = heap_p; *p; p = (shared upc_heap_p *)&(*p)->next)
+    printf("%d: addr: %s size: %ld global: %d seq: %d\n", MYTHREAD, __upc_alloc_sptostr(*p),(long int)(*p)->size,(*p)->is_global,(*p)->alloc_seq);
+#endif /* DEBUG_ALLOC */
+  for (p = heap_p; *p && ((*p)->size < alloc_size);
+       p = (shared upc_heap_p *)&(*p)->next) /* loop */ ;
+  alloc = *p;
+  if (alloc)
+    {
+      size_t this_size = alloc->size;
+      size_t rem = this_size - alloc_size;
+      alloc->is_global = global_flag;
+      alloc->alloc_tag = GUPCR_HEAP_ALLOC_TAG;
+      /* make sure the remaining fragment meets min. size requirement */
+      if (rem < (GUPCR_HEAP_ALLOC_MIN + GUPCR_HEAP_OVERHEAD))
+	{
+ 	  alloc_size = this_size;
+	  rem = 0;
+	}
+      alloc->size = alloc_size;
+      if (rem > 0)
+	{
+	  /* link the remainder onto the free list */
+	  upc_heap_p frag = __upc_alloc_ptr_add (alloc, alloc_size);
+	  frag->next = alloc->next;
+	  frag->alloc_seq = alloc->alloc_seq;
+	  frag->is_global = alloc->is_global;
+	  frag->alloc_tag = 0;
+	  frag->size = rem;
+	  *p = frag;
+	}
+      else
+	{
+	  /* entry exactly fits, delink this free list entry */
+	  *p = alloc->next;
+	}
+#ifdef DEBUG_ALLOC
+  printf ("%d:   __upc_heap_alloc: heap on exit\n", MYTHREAD);
+  for (p = heap_p; *p; p = ( shared upc_heap_p *)&(*p)->next)
+    printf("%d: addr: %s size: %ld global: %d seq: %d\n",MYTHREAD,__upc_alloc_sptostr(*p),(long int)(*p)->size,(*p)->is_global,(*p)->alloc_seq);
+#endif /* DEBUG_ALLOC */
+    }
+#ifdef DEBUG_ALLOC
+  printf ("%d: <- __upc_heap_alloc: %s\n", MYTHREAD, __upc_alloc_sptostr (alloc));
+#endif /* DEBUG_ALLOC */
+  return alloc;
+}
+
+static
+void
+__upc_heap_free (shared upc_heap_p *heap_p, upc_heap_p ptr)
+{
+  shared upc_heap_p *p;
+  upc_heap_p prev;
+#ifdef DEBUG_ALLOC
+  printf ("%d: --> __upc_heap_free: ", MYTHREAD);
+  printf("%d: addr: %s size: %ld global: %d seq: %d\n", MYTHREAD,
+    __upc_alloc_sptostr(ptr),(long int)ptr->size,ptr->is_global,ptr->alloc_seq);
+  printf ("%d:   heap on entry\n", MYTHREAD);
+  for (p = heap_p; *p; p = ( shared upc_heap_p *)&(*p)->next)
+    printf("%d: addr: %s size: %ld global: %d seq: %d\n", MYTHREAD, __upc_alloc_sptostr(*p),(long int)(*p)->size,(*p)->is_global,(*p)->alloc_seq);
+#endif /* DEBUG_ALLOC */
+  for (p = heap_p, prev = NULL; *p && (ptr > *p);
+       prev = *p, p = (shared upc_heap_p *)&(*p)->next) /* loop */ ;
+  ptr->alloc_tag = 0;
+  ptr->next = *p;
+  *p = ptr;
+  if (ptr->next && (ptr->next == __upc_alloc_ptr_add (ptr, ptr->size))
+      && (ptr->alloc_seq == ptr->next->alloc_seq))
+    {
+      /* adjacent, merge this block with the next */
+      ptr->size += ptr->next->size;
+      ptr->next =  ptr->next->next;
+    }
+  if (prev && (ptr  == __upc_alloc_ptr_add (prev, prev->size))
+      && (ptr->alloc_seq == prev->alloc_seq))
+    {
+      /* adjacent, merge this block with previous */
+      prev->size += ptr->size;
+      prev->next =  ptr->next;
+    }
+#ifdef DEBUG_ALLOC
+  printf ("%d: <- __upc_heap_free: heap on exit\n", MYTHREAD);
+  for (p = heap_p; *p; p = ( shared upc_heap_p *)&(*p)->next)
+    printf("%d: addr: %s size: %ld global: %d seq: %d\n",MYTHREAD,__upc_alloc_sptostr(*p),(long int)(*p)->size,(*p)->is_global,(*p)->alloc_seq);
+#endif /* DEBUG_ALLOC */
+}
+
+
+/* Allocate a block of size 'alloc_size' from the global heap.
+   Extend the heap if more space is needed.  'alloc_size' is
+   the size of the heap node returned, inclusive of overhead.  */
+
+static
+upc_heap_p
+__upc_global_heap_alloc (size_t alloc_size)
+{
+  shared upc_heap_p *heap_p = &__upc_global_heap;
+  upc_heap_p alloc;
+#ifdef DEBUG_ALLOC
+  printf ("%d: -> __upc_global_heap_alloc (%ld)\n", MYTHREAD, (long int)alloc_size);
+#endif /* DEBUG_ALLOC */
+  alloc = __upc_heap_alloc (heap_p, alloc_size, 1);
+  if (!alloc)
+    {
+      /* Extend the heap.  */
+      const size_t chunk_size = GUPCR_ROUND (alloc_size,
+                                          GUPCR_HEAP_CHUNK_SIZE);
+      const size_t vm_alloc_size = GUPCR_ROUND (chunk_size, GUPCR_VM_PAGE_SIZE);
+      const upc_page_num_t vm_alloc_pages = vm_alloc_size / GUPCR_VM_PAGE_SIZE;
+      const upc_page_num_t cur_page_alloc = __upc_vm_get_cur_page_alloc ();
+      const size_t new_alloc_base = (size_t)cur_page_alloc * GUPCR_VM_PAGE_SIZE;
+      const upc_heap_p new_alloc = __upc_alloc_build_pts (new_alloc_base, 0);
+#ifdef DEBUG_ALLOC
+      printf ("%d: __upc_global_heap_alloc: extend heap by %d pages\n",
+         MYTHREAD, vm_alloc_pages);
+#endif /* DEBUG_ALLOC */
+      if (!__upc_vm_alloc (vm_alloc_pages))
+        return NULL;
+      upc_memset (new_alloc, '\0', sizeof (upc_heap_t));
+      new_alloc->size = vm_alloc_size;
+      new_alloc->next = NULL;
+      new_alloc->is_global = 1;
+      new_alloc->alloc_seq = ++__upc_alloc_seq;;
+      /* Return the newly allocated space to the heap.  */
+      __upc_heap_free (heap_p, new_alloc);
+      alloc = __upc_heap_alloc (heap_p, alloc_size, 1);
+      if (!alloc)
+        __upc_fatal ("insufficient UPC dynamic shared memory");
+    }
+#ifdef DEBUG_ALLOC
+  printf ("%d: <- __upc_global_heap_alloc: %s\n", MYTHREAD, __upc_alloc_sptostr (alloc));
+#endif /* DEBUG_ALLOC */
+  return alloc;
+}
+
+static
+shared void *
+__upc_global_alloc (size_t size)
+{
+  shared void *mem = NULL;
+  if (size)
+    {
+      const size_t alloc_size = GUPCR_ROUND (size + GUPCR_HEAP_OVERHEAD,
+                                          GUPCR_HEAP_ALLOC_MIN);
+      upc_heap_p alloc;
+      __upc_acquire_alloc_lock ();
+      alloc = __upc_global_heap_alloc (alloc_size);
+      __upc_release_alloc_lock ();
+      if (alloc)
+        mem = __upc_alloc_ptr_add (alloc, GUPCR_HEAP_OVERHEAD);
+#ifdef DEBUG_ALLOC
+      printf ("%d: <- __upc_global_alloc: %s\n", MYTHREAD, __upc_alloc_sptostr(mem));
+#endif /* DEBUG_ALLOC */
+    }
+  return mem;
+}
+
+static
+inline
+shared void *
+__upc_local_alloc (size_t size)
+{
+  shared void *mem = NULL;
+#ifdef DEBUG_ALLOC
+  printf ("%d: --> __upc_local_alloc (%ld)\n", MYTHREAD,(long int)size);
+#endif /* DEBUG_ALLOC */
+  if (size)
+    {
+      const size_t alloc_size = GUPCR_ROUND (size + GUPCR_HEAP_OVERHEAD,
+                                          GUPCR_HEAP_ALLOC_MIN);
+      shared upc_heap_p *heap_p = &__upc_local_heap[MYTHREAD];
+      upc_heap_p alloc;
+      __upc_acquire_alloc_lock ();
+      alloc = __upc_heap_alloc (heap_p, alloc_size, 0);
+      if (!alloc)
+	{
+	  int chunk_seq;
+	  int t;
+	  size_t chunk_size = GUPCR_ROUND (size + GUPCR_HEAP_OVERHEAD,
+                                                  GUPCR_HEAP_CHUNK_SIZE);
+	  upc_heap_p chunk = __upc_global_heap_alloc (chunk_size);
+	  if (!chunk)
+	    return NULL;
+	  chunk_size = chunk->size;
+	  chunk_seq = chunk->alloc_seq;
+	  /* distribute this chunk over each local free list */
+	  for (t = 0; t < THREADS; ++t)
+	    {
+	      shared upc_heap_p *local_heap_p = &__upc_local_heap[t];
+	      /* Set the thread to 't' so that we can link
+	         this chunk onto the thread's local heap.  */
+	      upc_heap_p local_chunk = __upc_alloc_build_pts (
+		                          upc_addrfield (chunk), t);
+	      upc_fence;
+	      /* add this local chunk onto the local free list */
+	      upc_memset (local_chunk, '\0', sizeof (upc_heap_t));
+	      local_chunk->size = chunk_size;
+	      local_chunk->alloc_seq = chunk_seq;
+	      __upc_heap_free (local_heap_p, local_chunk);
+	    }
+	  alloc = __upc_heap_alloc (heap_p, alloc_size, 0);
+	}
+      __upc_release_alloc_lock ();
+      if (alloc)
+        mem = __upc_alloc_ptr_add (alloc, GUPCR_HEAP_OVERHEAD);
+    }
+#ifdef DEBUG_ALLOC
+  printf ("%d: <-- __upc_local_alloc: %s\n", MYTHREAD, __upc_alloc_sptostr (mem));
+#endif /* DEBUG_ALLOC */
+  return mem;
+}
+
+shared void *
+upc_global_alloc (size_t nblocks, size_t nbytes)
+{
+  size_t request_size = GUPCR_ROUND(nblocks, THREADS) * nbytes;
+  size_t alloc_size = request_size / THREADS;
+  shared void *mem = __upc_global_alloc (alloc_size);
+  return mem;
+}
+
+shared void *
+upc_all_alloc (size_t nblocks, size_t nbytes)
+{
+  size_t request_size = GUPCR_ROUND(nblocks, THREADS) * nbytes;
+  size_t alloc_size = request_size / THREADS;
+  shared void *mem = NULL;
+  if (alloc_size)
+    {
+      upc_barrier -1;
+      if (MYTHREAD == 0)
+        __upc_all_alloc_val = __upc_global_alloc (alloc_size);
+      upc_barrier -1;
+      mem = __upc_all_alloc_val;
+    }
+  return mem;
+}
+
+shared void *
+upc_alloc (size_t nbytes)
+{
+  shared void *mem = NULL; 
+  if (nbytes)
+    mem = __upc_local_alloc (nbytes);
+  return mem;
+}
+
+void
+upc_all_free (shared void *ptr)
+{
+  if (ptr)
+    {
+      const int thread = (int)upc_threadof (ptr);
+      upc_barrier -1;
+      /* Check for errors only on thread 0.  */
+      if ((MYTHREAD == 0) && (thread >= THREADS))
+        __upc_fatal ("upc_all_free() called with invalid shared pointer");
+      if (thread == MYTHREAD)
+        upc_free (ptr);
+    }
+}
+
+void
+upc_free (shared void *ptr)
+{
+  if (ptr)
+    {
+      const size_t offset __attribute__ ((unused)) = upc_addrfield (ptr);
+      const int thread = (int)upc_threadof (ptr);
+      const size_t phase = upc_phaseof (ptr);
+      shared upc_heap_p *heap_p;
+      upc_heap_p thisp;
+      if (phase || thread >= THREADS)
+        __upc_fatal ("upc_free() called with invalid shared pointer");
+      thisp = (upc_heap_p) __upc_alloc_ptr_add (ptr, -GUPCR_HEAP_OVERHEAD);
+      if (thisp->is_global && thread)
+        __upc_fatal ("upc_free() called with invalid shared pointer");
+      if (thisp->alloc_tag != GUPCR_HEAP_ALLOC_TAG)
+	__upc_fatal ("upc_free() called with pointer to unallocated space");
+      if (thisp->is_global)
+        heap_p = (shared upc_heap_p *)&__upc_global_heap;
+      else
+        heap_p = (shared upc_heap_p *)&__upc_local_heap[thread];
+      __upc_acquire_alloc_lock ();
+      __upc_heap_free (heap_p, thisp);
+      __upc_release_alloc_lock ();
+    }
+}
Index: libgupc/smp/upc_allocg.upc
===================================================================
--- libgupc/smp/upc_allocg.upc	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_allocg.upc	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,115 @@
+/* Copyright (C) 2009-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#include <upc.h>
+#include "gasp_upc.h"
+#include "upc_pupc.h"
+
+#ifndef NULL
+#define NULL (void *)0
+#endif
+
+/* The filename of the location where a runtime
+   error was detected.  This is set by the various
+   debug-enabled ('g') UPC runtime library routines.  */
+extern GUPCR_THREAD_LOCAL const char *__upc_err_filename;
+
+/* The line number of the location where a runtime
+   error was detected.  This is set by the various
+   debug-enabled ('g') UPC runtime library routines.  */
+extern GUPCR_THREAD_LOCAL unsigned int __upc_err_linenum;
+
+#define GUPCR_SET_ERR_LOC() \
+  do \
+    { \
+      __upc_err_filename = filename; \
+      __upc_err_linenum  = linenum; \
+    } while (0)
+
+#define GUPCR_CLEAR_ERR_LOC() \
+  do \
+    { \
+      __upc_err_filename = NULL; \
+      __upc_err_linenum  = 0; \
+    } while (0)
+
+shared void *
+upc_global_allocg (size_t nblocks, size_t nbytes, const char *filename,
+		   int linenum)
+{
+  shared void *result;
+  p_start (GASP_UPC_GLOBAL_ALLOC, nblocks, nbytes);
+  GUPCR_SET_ERR_LOC();
+  result = upc_global_alloc (nblocks, nbytes);
+  GUPCR_CLEAR_ERR_LOC();
+  p_end (GASP_UPC_GLOBAL_ALLOC, nblocks, nbytes, &result);
+  return result;
+}
+
+shared void *
+upc_all_allocg (size_t nblocks, size_t nbytes, const char *filename, int linenum)
+{
+  shared void *result;
+  p_start (GASP_UPC_ALL_ALLOC, nblocks, nbytes);
+  GUPCR_SET_ERR_LOC();
+  result = upc_all_alloc (nblocks, nbytes);
+  GUPCR_CLEAR_ERR_LOC();
+  p_end (GASP_UPC_ALL_ALLOC, nblocks, nbytes, &result);
+  return result;
+}
+
+shared void *
+upc_allocg (size_t nbytes, const char *filename, int linenum)
+{
+  shared void *val;
+  p_start (GASP_UPC_ALLOC, nbytes);
+  GUPCR_SET_ERR_LOC();
+  val = upc_alloc (nbytes);
+  GUPCR_CLEAR_ERR_LOC();
+  p_end (GASP_UPC_ALLOC, nbytes, &val);
+  return val;
+}
+
+void
+upc_freeg (shared void *ptr, const char *filename, int linenum)
+{
+  p_start (GASP_UPC_FREE, &ptr);
+  GUPCR_SET_ERR_LOC();
+  upc_free (ptr);
+  GUPCR_CLEAR_ERR_LOC();
+  p_end (GASP_UPC_FREE, &ptr);
+}
+
+void
+upc_all_freeg (shared void *ptr, const char *filename, int linenum)
+{
+  p_start (GASP_UPC_FREE, &ptr);
+  GUPCR_SET_ERR_LOC();
+  upc_all_free (ptr);
+  GUPCR_CLEAR_ERR_LOC();
+  p_end (GASP_UPC_FREE, &ptr);
+}
Index: libgupc/smp/upc_atomic.upc
===================================================================
--- libgupc/smp/upc_atomic.upc	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_atomic.upc	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,1872 @@
+/* Process the definitions file with autogen to produce upc_atomic.upc:
+
+   autogen -L ../include upc_atomic.def
+
+   Copyright (C) 2013-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include <upc.h>
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <upc_atomic.h>
+#include "upc_config.h"
+
+/**
+ * @file __upc_atomic.upc
+ * GUPC Portals4 UPC atomics implementation.
+ */
+
+/**
+ * @addtogroup ATOMIC GUPCR Atomics Functions
+ * @{
+ */
+
+/** Atomic domain representation */
+struct upc_atomicdomain_struct
+{
+  upc_op_t ops;
+  upc_type_t optype;
+};
+
+/* Represent a bit-encoded operation as an integer.  */
+typedef unsigned int upc_op_num_t;
+
+
+typedef int I_type;
+typedef unsigned int UI_type;
+typedef long L_type;
+typedef unsigned long UL_type;
+typedef long long LL_type;
+typedef unsigned long long ULL_type;
+typedef int32_t I32_type;
+typedef uint32_t UI32_type;
+typedef int64_t I64_type;
+typedef uint64_t UI64_type;
+typedef float F_type;
+typedef double D_type;
+typedef shared void * PTS_type;
+
+
+#define ATOMIC_ACCESS_OPS (UPC_GET | UPC_SET | UPC_CSWAP)
+
+#define ATOMIC_NUM_OPS (UPC_ADD | UPC_MULT | UPC_MIN | UPC_MAX | UPC_SUB | UPC_INC | UPC_DEC)
+
+#define ATOMIC_BIT_OPS (UPC_AND | UPC_OR | UPC_XOR)
+#define ATOMIC_ALL_OPS (ATOMIC_ACCESS_OPS | ATOMIC_NUM_OPS \
+                        | ATOMIC_BIT_OPS)
+
+/**
+ * Check if OP is a valid atomic operation type.
+ *
+ * @param [in] op UPC atomic operation
+ * @retval TRUE if op is a valid atomic operation
+ */
+static inline bool
+__upc_atomic_is_valid_op (upc_op_t op)
+{
+  return !((op & ~(-op)) || (op & ~ATOMIC_ALL_OPS));
+}
+
+/**
+ * Convert the bit-encoded OP into an integer.
+ *
+ * @param [in] op UPC atomic operation
+ * @retval op represented as integer index
+ *  (UPC_ADD_OP, UPC_MULT_OP ...)
+ */
+static inline upc_op_num_t
+__upc_atomic_op_num (upc_op_t op)
+{
+  return (LONG_LONG_BITS - 1) - __builtin_clzll ((long long) op);
+}
+
+/**
+ * Check if UPC_TYPE is a valid atomic operation type.
+ *
+ * @param [in] upc_type UPC atomic type
+ * @retval TRUE if atomic operations are supported on UPC_TYPE
+ */
+static bool
+__upc_atomic_is_valid_type (upc_type_t upc_type)
+{
+  switch (upc_type)
+    {
+    case UPC_INT:
+    case UPC_UINT:
+    case UPC_LONG:
+    case UPC_ULONG:
+    case UPC_LLONG:
+    case UPC_ULLONG:
+    case UPC_INT32:
+    case UPC_UINT32:
+    case UPC_INT64:
+    case UPC_UINT64:
+    case UPC_FLOAT:
+    case UPC_DOUBLE:
+    case UPC_PTS:
+      return true;
+    default: break;
+    }
+    return false;
+}
+
+/**
+ * Return the atomic operations supported for type UPC_TYPE.
+ *
+ * @param [in] upc_type UPC atomic type
+ * @retval bit vector of supported atomic operations.
+ */
+static upc_op_t
+__upc_atomic_supported_ops (upc_type_t upc_type)
+{
+  switch (upc_type)
+    {
+    case UPC_INT:
+      return ATOMIC_ACCESS_OPS | ATOMIC_NUM_OPS | ATOMIC_BIT_OPS;
+    case UPC_UINT:
+      return ATOMIC_ACCESS_OPS | ATOMIC_NUM_OPS | ATOMIC_BIT_OPS;
+    case UPC_LONG:
+      return ATOMIC_ACCESS_OPS | ATOMIC_NUM_OPS | ATOMIC_BIT_OPS;
+    case UPC_ULONG:
+      return ATOMIC_ACCESS_OPS | ATOMIC_NUM_OPS | ATOMIC_BIT_OPS;
+    case UPC_LLONG:
+      return ATOMIC_ACCESS_OPS | ATOMIC_NUM_OPS | ATOMIC_BIT_OPS;
+    case UPC_ULLONG:
+      return ATOMIC_ACCESS_OPS | ATOMIC_NUM_OPS | ATOMIC_BIT_OPS;
+    case UPC_INT32:
+      return ATOMIC_ACCESS_OPS | ATOMIC_NUM_OPS | ATOMIC_BIT_OPS;
+    case UPC_UINT32:
+      return ATOMIC_ACCESS_OPS | ATOMIC_NUM_OPS | ATOMIC_BIT_OPS;
+    case UPC_INT64:
+      return ATOMIC_ACCESS_OPS | ATOMIC_NUM_OPS | ATOMIC_BIT_OPS;
+    case UPC_UINT64:
+      return ATOMIC_ACCESS_OPS | ATOMIC_NUM_OPS | ATOMIC_BIT_OPS;
+    case UPC_FLOAT:
+      return ATOMIC_ACCESS_OPS | ATOMIC_NUM_OPS;
+    case UPC_DOUBLE:
+      return ATOMIC_ACCESS_OPS | ATOMIC_NUM_OPS;
+    case UPC_PTS:
+      return ATOMIC_ACCESS_OPS;
+    }
+    return 0;
+}
+
+/**
+ * Convert UPC atomic operation into a string.
+ *
+ * @param [in] upc_op UPC atomic operation
+ * @retval Character string
+ */
+static const char *
+__upc_atomic_op_name (upc_op_num_t op_num)
+{
+  switch (op_num)
+    {
+    case UPC_ADD_OP:
+      return "UPC_ADD";
+    case UPC_MULT_OP:
+      return "UPC_MULT";
+    case UPC_AND_OP:
+      return "UPC_AND";
+    case UPC_OR_OP:
+      return "UPC_OR";
+    case UPC_XOR_OP:
+      return "UPC_XOR";
+    case UPC_MIN_OP:
+      return "UPC_MIN";
+    case UPC_MAX_OP:
+      return "UPC_MAX";
+    case UPC_GET_OP:
+      return "UPC_GET";
+    case UPC_SET_OP:
+      return "UPC_SET";
+    case UPC_CSWAP_OP:
+      return "UPC_CSWAP";
+    case UPC_SUB_OP:
+      return "UPC_SUB";
+    case UPC_INC_OP:
+      return "UPC_INC";
+    case UPC_DEC_OP:
+      return "UPC_DEC";
+    }
+    return NULL;
+}
+
+/**
+ * Convert UPC atomic type into a string.
+ *
+ * @param [in] upc_type UPC atomic type
+ * @retval Character string
+ */
+static const char *
+__upc_atomic_type_name (upc_type_t upc_type)
+{
+  switch (upc_type)
+    {
+    case UPC_INT:
+      return "UPC_INT";
+    case UPC_UINT:
+      return "UPC_UINT";
+    case UPC_LONG:
+      return "UPC_LONG";
+    case UPC_ULONG:
+      return "UPC_ULONG";
+    case UPC_LLONG:
+      return "UPC_LLONG";
+    case UPC_ULLONG:
+      return "UPC_ULLONG";
+    case UPC_INT32:
+      return "UPC_INT32";
+    case UPC_UINT32:
+      return "UPC_UINT32";
+    case UPC_INT64:
+      return "UPC_INT64";
+    case UPC_UINT64:
+      return "UPC_UINT64";
+    case UPC_FLOAT:
+      return "UPC_FLOAT";
+    case UPC_DOUBLE:
+      return "UPC_DOUBLE";
+    case UPC_PTS:
+      return "UPC_PTS";
+    }
+    return NULL;
+}
+
+#define REQ_FETCH_PTR 0b00000001
+#define REQ_OPERAND1  0b00000010
+#define REQ_OPERAND2  0b00000100
+#define NULL_OPERAND1 0b00001000
+#define NULL_OPERAND2 0b00010000
+
+static const unsigned int operand_check[] =
+  {
+    /* UPC_ADD_OP */ REQ_OPERAND1 | NULL_OPERAND2,
+    /* UPC_MULT_OP */ REQ_OPERAND1 | NULL_OPERAND2,
+    /* UPC_AND_OP */ REQ_OPERAND1 | NULL_OPERAND2,
+    /* UPC_OR_OP */ REQ_OPERAND1 | NULL_OPERAND2,
+    /* UPC_XOR_OP */ REQ_OPERAND1 | NULL_OPERAND2,
+    /* UPC_LOGAND_OP */ 0,
+    /* UPC_LOGOR_OP */ 0,
+    /* UPC_MIN_OP */ REQ_OPERAND1 | NULL_OPERAND2,
+    /* UPC_MAX_OP */ REQ_OPERAND1 | NULL_OPERAND2,
+    /* UPC_GET_OP */ REQ_FETCH_PTR | NULL_OPERAND1 | NULL_OPERAND2,
+    /* UPC_SET_OP */ REQ_OPERAND1 | NULL_OPERAND2,
+    /* UPC_CSWAP_OP */ REQ_OPERAND1 | REQ_OPERAND2,
+    /* UPC_SUB_OP */ REQ_OPERAND1 | NULL_OPERAND2,
+    /* UPC_INC_OP */ NULL_OPERAND1 | NULL_OPERAND2,
+    /* UPC_DEC_OP */ NULL_OPERAND1 | NULL_OPERAND2,
+  };
+
+static inline void
+__upc_atomic_check_operands (upc_op_num_t op_num,
+		   void * restrict fetch_ptr,
+		   const void * restrict operand1,
+		   const void * restrict operand2)
+{
+  const unsigned int check = operand_check[op_num];
+  if ((check & REQ_FETCH_PTR) && fetch_ptr == NULL)
+    __upc_fatal ("atomic operation `%s' "
+                 "requires a non-NULL fetch pointer",
+		 __upc_atomic_op_name (op_num));
+  if ((check & REQ_OPERAND1) && operand1 == NULL)
+    __upc_fatal ("atomic operation `%s' "
+                 "requires a non-NULL operand1 pointer",
+		 __upc_atomic_op_name (op_num));
+  if ((check & REQ_OPERAND2) && operand2 == NULL)
+    __upc_fatal ("atomic operation `%s' "
+                 "requires a non-NULL operand2 pointer",
+		 __upc_atomic_op_name (op_num));
+  if ((check & NULL_OPERAND1) && operand1 != NULL)
+    __upc_fatal ("atomic operation `%s' "
+                 "requires a NULL operand1 pointer",
+		 __upc_atomic_op_name (op_num));
+  if ((check & NULL_OPERAND2) && operand2 != NULL)
+    __upc_fatal ("atomic operation `%s' "
+                 "requires a NULL operand2 pointer",
+		 __upc_atomic_op_name (op_num));
+}
+
+static void
+__upc_atomic_I (
+	I_type * restrict fetch_ptr,
+	upc_op_num_t op_num,
+	shared I_type * restrict target,
+	I_type * restrict operand1 __attribute__((unused)),
+	I_type * restrict operand2 __attribute__((unused)))
+{
+  I_type orig_value __attribute__((unused));
+  I_type new_value __attribute__((unused));
+  
+  I_type *target_ptr = __cvtaddr (*(upc_shared_ptr_t *)&target);
+  switch (op_num)
+    {
+      case UPC_ADD_OP:
+	orig_value = __atomic_fetch_add (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MULT_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value * *operand1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_AND_OP:
+	orig_value = __atomic_fetch_and (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_OR_OP:
+	orig_value = __atomic_fetch_or (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_XOR_OP:
+	orig_value = __atomic_fetch_xor (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MIN_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 < orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_MAX_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 > orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_GET_OP:
+        __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SET_OP:
+	if (fetch_ptr == NULL)
+	  __atomic_store (target_ptr, operand1, __ATOMIC_SEQ_CST);
+	else
+	  __atomic_exchange (target_ptr, operand1, &orig_value,
+			     /* memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_CSWAP_OP:
+	orig_value = *operand1;
+	/* __atomic_compare_exchange will return the previous value
+	   in &orig_value independent of whether operand2 is written
+	   to the target location.  */
+	(void) __atomic_compare_exchange (target_ptr,
+			    &orig_value, operand2,
+			    /* weak */ 0,
+			    /* success_memmodel */ __ATOMIC_SEQ_CST,
+			    /* failure_memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SUB_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_INC_OP:
+	orig_value = __atomic_fetch_add (target_ptr, (int) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_DEC_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, (int) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      default: break;
+    }
+  if (fetch_ptr != NULL)
+    *fetch_ptr = orig_value;
+}
+
+static void
+__upc_atomic_UI (
+	UI_type * restrict fetch_ptr,
+	upc_op_num_t op_num,
+	shared UI_type * restrict target,
+	UI_type * restrict operand1 __attribute__((unused)),
+	UI_type * restrict operand2 __attribute__((unused)))
+{
+  UI_type orig_value __attribute__((unused));
+  UI_type new_value __attribute__((unused));
+  
+  UI_type *target_ptr = __cvtaddr (*(upc_shared_ptr_t *)&target);
+  switch (op_num)
+    {
+      case UPC_ADD_OP:
+	orig_value = __atomic_fetch_add (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MULT_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value * *operand1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_AND_OP:
+	orig_value = __atomic_fetch_and (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_OR_OP:
+	orig_value = __atomic_fetch_or (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_XOR_OP:
+	orig_value = __atomic_fetch_xor (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MIN_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 < orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_MAX_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 > orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_GET_OP:
+        __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SET_OP:
+	if (fetch_ptr == NULL)
+	  __atomic_store (target_ptr, operand1, __ATOMIC_SEQ_CST);
+	else
+	  __atomic_exchange (target_ptr, operand1, &orig_value,
+			     /* memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_CSWAP_OP:
+	orig_value = *operand1;
+	/* __atomic_compare_exchange will return the previous value
+	   in &orig_value independent of whether operand2 is written
+	   to the target location.  */
+	(void) __atomic_compare_exchange (target_ptr,
+			    &orig_value, operand2,
+			    /* weak */ 0,
+			    /* success_memmodel */ __ATOMIC_SEQ_CST,
+			    /* failure_memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SUB_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_INC_OP:
+	orig_value = __atomic_fetch_add (target_ptr, (unsigned int) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_DEC_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, (unsigned int) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      default: break;
+    }
+  if (fetch_ptr != NULL)
+    *fetch_ptr = orig_value;
+}
+
+static void
+__upc_atomic_L (
+	L_type * restrict fetch_ptr,
+	upc_op_num_t op_num,
+	shared L_type * restrict target,
+	L_type * restrict operand1 __attribute__((unused)),
+	L_type * restrict operand2 __attribute__((unused)))
+{
+  L_type orig_value __attribute__((unused));
+  L_type new_value __attribute__((unused));
+  
+  L_type *target_ptr = __cvtaddr (*(upc_shared_ptr_t *)&target);
+  switch (op_num)
+    {
+      case UPC_ADD_OP:
+	orig_value = __atomic_fetch_add (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MULT_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value * *operand1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_AND_OP:
+	orig_value = __atomic_fetch_and (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_OR_OP:
+	orig_value = __atomic_fetch_or (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_XOR_OP:
+	orig_value = __atomic_fetch_xor (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MIN_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 < orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_MAX_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 > orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_GET_OP:
+        __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SET_OP:
+	if (fetch_ptr == NULL)
+	  __atomic_store (target_ptr, operand1, __ATOMIC_SEQ_CST);
+	else
+	  __atomic_exchange (target_ptr, operand1, &orig_value,
+			     /* memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_CSWAP_OP:
+	orig_value = *operand1;
+	/* __atomic_compare_exchange will return the previous value
+	   in &orig_value independent of whether operand2 is written
+	   to the target location.  */
+	(void) __atomic_compare_exchange (target_ptr,
+			    &orig_value, operand2,
+			    /* weak */ 0,
+			    /* success_memmodel */ __ATOMIC_SEQ_CST,
+			    /* failure_memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SUB_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_INC_OP:
+	orig_value = __atomic_fetch_add (target_ptr, (long) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_DEC_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, (long) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      default: break;
+    }
+  if (fetch_ptr != NULL)
+    *fetch_ptr = orig_value;
+}
+
+static void
+__upc_atomic_UL (
+	UL_type * restrict fetch_ptr,
+	upc_op_num_t op_num,
+	shared UL_type * restrict target,
+	UL_type * restrict operand1 __attribute__((unused)),
+	UL_type * restrict operand2 __attribute__((unused)))
+{
+  UL_type orig_value __attribute__((unused));
+  UL_type new_value __attribute__((unused));
+  
+  UL_type *target_ptr = __cvtaddr (*(upc_shared_ptr_t *)&target);
+  switch (op_num)
+    {
+      case UPC_ADD_OP:
+	orig_value = __atomic_fetch_add (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MULT_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value * *operand1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_AND_OP:
+	orig_value = __atomic_fetch_and (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_OR_OP:
+	orig_value = __atomic_fetch_or (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_XOR_OP:
+	orig_value = __atomic_fetch_xor (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MIN_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 < orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_MAX_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 > orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_GET_OP:
+        __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SET_OP:
+	if (fetch_ptr == NULL)
+	  __atomic_store (target_ptr, operand1, __ATOMIC_SEQ_CST);
+	else
+	  __atomic_exchange (target_ptr, operand1, &orig_value,
+			     /* memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_CSWAP_OP:
+	orig_value = *operand1;
+	/* __atomic_compare_exchange will return the previous value
+	   in &orig_value independent of whether operand2 is written
+	   to the target location.  */
+	(void) __atomic_compare_exchange (target_ptr,
+			    &orig_value, operand2,
+			    /* weak */ 0,
+			    /* success_memmodel */ __ATOMIC_SEQ_CST,
+			    /* failure_memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SUB_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_INC_OP:
+	orig_value = __atomic_fetch_add (target_ptr, (unsigned long) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_DEC_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, (unsigned long) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      default: break;
+    }
+  if (fetch_ptr != NULL)
+    *fetch_ptr = orig_value;
+}
+
+static void
+__upc_atomic_LL (
+	LL_type * restrict fetch_ptr,
+	upc_op_num_t op_num,
+	shared LL_type * restrict target,
+	LL_type * restrict operand1 __attribute__((unused)),
+	LL_type * restrict operand2 __attribute__((unused)))
+{
+  LL_type orig_value __attribute__((unused));
+  LL_type new_value __attribute__((unused));
+  
+  LL_type *target_ptr = __cvtaddr (*(upc_shared_ptr_t *)&target);
+  switch (op_num)
+    {
+      case UPC_ADD_OP:
+	orig_value = __atomic_fetch_add (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MULT_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value * *operand1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_AND_OP:
+	orig_value = __atomic_fetch_and (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_OR_OP:
+	orig_value = __atomic_fetch_or (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_XOR_OP:
+	orig_value = __atomic_fetch_xor (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MIN_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 < orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_MAX_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 > orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_GET_OP:
+        __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SET_OP:
+	if (fetch_ptr == NULL)
+	  __atomic_store (target_ptr, operand1, __ATOMIC_SEQ_CST);
+	else
+	  __atomic_exchange (target_ptr, operand1, &orig_value,
+			     /* memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_CSWAP_OP:
+	orig_value = *operand1;
+	/* __atomic_compare_exchange will return the previous value
+	   in &orig_value independent of whether operand2 is written
+	   to the target location.  */
+	(void) __atomic_compare_exchange (target_ptr,
+			    &orig_value, operand2,
+			    /* weak */ 0,
+			    /* success_memmodel */ __ATOMIC_SEQ_CST,
+			    /* failure_memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SUB_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_INC_OP:
+	orig_value = __atomic_fetch_add (target_ptr, (long long) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_DEC_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, (long long) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      default: break;
+    }
+  if (fetch_ptr != NULL)
+    *fetch_ptr = orig_value;
+}
+
+static void
+__upc_atomic_ULL (
+	ULL_type * restrict fetch_ptr,
+	upc_op_num_t op_num,
+	shared ULL_type * restrict target,
+	ULL_type * restrict operand1 __attribute__((unused)),
+	ULL_type * restrict operand2 __attribute__((unused)))
+{
+  ULL_type orig_value __attribute__((unused));
+  ULL_type new_value __attribute__((unused));
+  
+  ULL_type *target_ptr = __cvtaddr (*(upc_shared_ptr_t *)&target);
+  switch (op_num)
+    {
+      case UPC_ADD_OP:
+	orig_value = __atomic_fetch_add (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MULT_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value * *operand1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_AND_OP:
+	orig_value = __atomic_fetch_and (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_OR_OP:
+	orig_value = __atomic_fetch_or (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_XOR_OP:
+	orig_value = __atomic_fetch_xor (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MIN_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 < orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_MAX_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 > orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_GET_OP:
+        __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SET_OP:
+	if (fetch_ptr == NULL)
+	  __atomic_store (target_ptr, operand1, __ATOMIC_SEQ_CST);
+	else
+	  __atomic_exchange (target_ptr, operand1, &orig_value,
+			     /* memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_CSWAP_OP:
+	orig_value = *operand1;
+	/* __atomic_compare_exchange will return the previous value
+	   in &orig_value independent of whether operand2 is written
+	   to the target location.  */
+	(void) __atomic_compare_exchange (target_ptr,
+			    &orig_value, operand2,
+			    /* weak */ 0,
+			    /* success_memmodel */ __ATOMIC_SEQ_CST,
+			    /* failure_memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SUB_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_INC_OP:
+	orig_value = __atomic_fetch_add (target_ptr, (unsigned long long) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_DEC_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, (unsigned long long) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      default: break;
+    }
+  if (fetch_ptr != NULL)
+    *fetch_ptr = orig_value;
+}
+
+static void
+__upc_atomic_I32 (
+	I32_type * restrict fetch_ptr,
+	upc_op_num_t op_num,
+	shared I32_type * restrict target,
+	I32_type * restrict operand1 __attribute__((unused)),
+	I32_type * restrict operand2 __attribute__((unused)))
+{
+  I32_type orig_value __attribute__((unused));
+  I32_type new_value __attribute__((unused));
+  
+  I32_type *target_ptr = __cvtaddr (*(upc_shared_ptr_t *)&target);
+  switch (op_num)
+    {
+      case UPC_ADD_OP:
+	orig_value = __atomic_fetch_add (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MULT_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value * *operand1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_AND_OP:
+	orig_value = __atomic_fetch_and (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_OR_OP:
+	orig_value = __atomic_fetch_or (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_XOR_OP:
+	orig_value = __atomic_fetch_xor (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MIN_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 < orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_MAX_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 > orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_GET_OP:
+        __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SET_OP:
+	if (fetch_ptr == NULL)
+	  __atomic_store (target_ptr, operand1, __ATOMIC_SEQ_CST);
+	else
+	  __atomic_exchange (target_ptr, operand1, &orig_value,
+			     /* memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_CSWAP_OP:
+	orig_value = *operand1;
+	/* __atomic_compare_exchange will return the previous value
+	   in &orig_value independent of whether operand2 is written
+	   to the target location.  */
+	(void) __atomic_compare_exchange (target_ptr,
+			    &orig_value, operand2,
+			    /* weak */ 0,
+			    /* success_memmodel */ __ATOMIC_SEQ_CST,
+			    /* failure_memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SUB_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_INC_OP:
+	orig_value = __atomic_fetch_add (target_ptr, (int32_t) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_DEC_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, (int32_t) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      default: break;
+    }
+  if (fetch_ptr != NULL)
+    *fetch_ptr = orig_value;
+}
+
+static void
+__upc_atomic_UI32 (
+	UI32_type * restrict fetch_ptr,
+	upc_op_num_t op_num,
+	shared UI32_type * restrict target,
+	UI32_type * restrict operand1 __attribute__((unused)),
+	UI32_type * restrict operand2 __attribute__((unused)))
+{
+  UI32_type orig_value __attribute__((unused));
+  UI32_type new_value __attribute__((unused));
+  
+  UI32_type *target_ptr = __cvtaddr (*(upc_shared_ptr_t *)&target);
+  switch (op_num)
+    {
+      case UPC_ADD_OP:
+	orig_value = __atomic_fetch_add (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MULT_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value * *operand1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_AND_OP:
+	orig_value = __atomic_fetch_and (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_OR_OP:
+	orig_value = __atomic_fetch_or (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_XOR_OP:
+	orig_value = __atomic_fetch_xor (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MIN_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 < orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_MAX_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 > orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_GET_OP:
+        __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SET_OP:
+	if (fetch_ptr == NULL)
+	  __atomic_store (target_ptr, operand1, __ATOMIC_SEQ_CST);
+	else
+	  __atomic_exchange (target_ptr, operand1, &orig_value,
+			     /* memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_CSWAP_OP:
+	orig_value = *operand1;
+	/* __atomic_compare_exchange will return the previous value
+	   in &orig_value independent of whether operand2 is written
+	   to the target location.  */
+	(void) __atomic_compare_exchange (target_ptr,
+			    &orig_value, operand2,
+			    /* weak */ 0,
+			    /* success_memmodel */ __ATOMIC_SEQ_CST,
+			    /* failure_memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SUB_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_INC_OP:
+	orig_value = __atomic_fetch_add (target_ptr, (uint32_t) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_DEC_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, (uint32_t) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      default: break;
+    }
+  if (fetch_ptr != NULL)
+    *fetch_ptr = orig_value;
+}
+
+static void
+__upc_atomic_I64 (
+	I64_type * restrict fetch_ptr,
+	upc_op_num_t op_num,
+	shared I64_type * restrict target,
+	I64_type * restrict operand1 __attribute__((unused)),
+	I64_type * restrict operand2 __attribute__((unused)))
+{
+  I64_type orig_value __attribute__((unused));
+  I64_type new_value __attribute__((unused));
+  
+  I64_type *target_ptr = __cvtaddr (*(upc_shared_ptr_t *)&target);
+  switch (op_num)
+    {
+      case UPC_ADD_OP:
+	orig_value = __atomic_fetch_add (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MULT_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value * *operand1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_AND_OP:
+	orig_value = __atomic_fetch_and (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_OR_OP:
+	orig_value = __atomic_fetch_or (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_XOR_OP:
+	orig_value = __atomic_fetch_xor (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MIN_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 < orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_MAX_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 > orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_GET_OP:
+        __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SET_OP:
+	if (fetch_ptr == NULL)
+	  __atomic_store (target_ptr, operand1, __ATOMIC_SEQ_CST);
+	else
+	  __atomic_exchange (target_ptr, operand1, &orig_value,
+			     /* memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_CSWAP_OP:
+	orig_value = *operand1;
+	/* __atomic_compare_exchange will return the previous value
+	   in &orig_value independent of whether operand2 is written
+	   to the target location.  */
+	(void) __atomic_compare_exchange (target_ptr,
+			    &orig_value, operand2,
+			    /* weak */ 0,
+			    /* success_memmodel */ __ATOMIC_SEQ_CST,
+			    /* failure_memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SUB_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_INC_OP:
+	orig_value = __atomic_fetch_add (target_ptr, (int64_t) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_DEC_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, (int64_t) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      default: break;
+    }
+  if (fetch_ptr != NULL)
+    *fetch_ptr = orig_value;
+}
+
+static void
+__upc_atomic_UI64 (
+	UI64_type * restrict fetch_ptr,
+	upc_op_num_t op_num,
+	shared UI64_type * restrict target,
+	UI64_type * restrict operand1 __attribute__((unused)),
+	UI64_type * restrict operand2 __attribute__((unused)))
+{
+  UI64_type orig_value __attribute__((unused));
+  UI64_type new_value __attribute__((unused));
+  
+  UI64_type *target_ptr = __cvtaddr (*(upc_shared_ptr_t *)&target);
+  switch (op_num)
+    {
+      case UPC_ADD_OP:
+	orig_value = __atomic_fetch_add (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MULT_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value * *operand1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_AND_OP:
+	orig_value = __atomic_fetch_and (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_OR_OP:
+	orig_value = __atomic_fetch_or (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_XOR_OP:
+	orig_value = __atomic_fetch_xor (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_MIN_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 < orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_MAX_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 > orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_GET_OP:
+        __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SET_OP:
+	if (fetch_ptr == NULL)
+	  __atomic_store (target_ptr, operand1, __ATOMIC_SEQ_CST);
+	else
+	  __atomic_exchange (target_ptr, operand1, &orig_value,
+			     /* memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_CSWAP_OP:
+	orig_value = *operand1;
+	/* __atomic_compare_exchange will return the previous value
+	   in &orig_value independent of whether operand2 is written
+	   to the target location.  */
+	(void) __atomic_compare_exchange (target_ptr,
+			    &orig_value, operand2,
+			    /* weak */ 0,
+			    /* success_memmodel */ __ATOMIC_SEQ_CST,
+			    /* failure_memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SUB_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, *operand1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_INC_OP:
+	orig_value = __atomic_fetch_add (target_ptr, (uint64_t) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      case UPC_DEC_OP:
+	orig_value = __atomic_fetch_sub (target_ptr, (uint64_t) 1,
+				__ATOMIC_SEQ_CST);
+        break;
+      default: break;
+    }
+  if (fetch_ptr != NULL)
+    *fetch_ptr = orig_value;
+}
+
+static void
+__upc_atomic_F (
+	F_type * restrict fetch_ptr,
+	upc_op_num_t op_num,
+	shared F_type * restrict target,
+	F_type * restrict operand1 __attribute__((unused)),
+	F_type * restrict operand2 __attribute__((unused)))
+{
+  F_type orig_value __attribute__((unused));
+  F_type new_value __attribute__((unused));
+  
+  F_type *target_ptr = __cvtaddr (*(upc_shared_ptr_t *)&target);
+  switch (op_num)
+    {
+      case UPC_ADD_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value + *operand1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_MULT_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value * *operand1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_MIN_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 < orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_MAX_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 > orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_GET_OP:
+        __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SET_OP:
+	if (fetch_ptr == NULL)
+	  __atomic_store (target_ptr, operand1, __ATOMIC_SEQ_CST);
+	else
+	  __atomic_exchange (target_ptr, operand1, &orig_value,
+			     /* memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_CSWAP_OP:
+	orig_value = *operand1;
+	/* __atomic_compare_exchange will return the previous value
+	   in &orig_value independent of whether operand2 is written
+	   to the target location.  */
+	(void) __atomic_compare_exchange (target_ptr,
+			    &orig_value, operand2,
+			    /* weak */ 0,
+			    /* success_memmodel */ __ATOMIC_SEQ_CST,
+			    /* failure_memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SUB_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value - *operand1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_INC_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value + (float) 1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_DEC_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value - (float) 1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      default: break;
+    }
+  if (fetch_ptr != NULL)
+    *fetch_ptr = orig_value;
+}
+
+static void
+__upc_atomic_D (
+	D_type * restrict fetch_ptr,
+	upc_op_num_t op_num,
+	shared D_type * restrict target,
+	D_type * restrict operand1 __attribute__((unused)),
+	D_type * restrict operand2 __attribute__((unused)))
+{
+  D_type orig_value __attribute__((unused));
+  D_type new_value __attribute__((unused));
+  
+  D_type *target_ptr = __cvtaddr (*(upc_shared_ptr_t *)&target);
+  switch (op_num)
+    {
+      case UPC_ADD_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value + *operand1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_MULT_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value * *operand1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_MIN_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 < orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_MAX_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = (*operand1 > orig_value) ? *operand1 : orig_value;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_GET_OP:
+        __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SET_OP:
+	if (fetch_ptr == NULL)
+	  __atomic_store (target_ptr, operand1, __ATOMIC_SEQ_CST);
+	else
+	  __atomic_exchange (target_ptr, operand1, &orig_value,
+			     /* memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_CSWAP_OP:
+	orig_value = *operand1;
+	/* __atomic_compare_exchange will return the previous value
+	   in &orig_value independent of whether operand2 is written
+	   to the target location.  */
+	(void) __atomic_compare_exchange (target_ptr,
+			    &orig_value, operand2,
+			    /* weak */ 0,
+			    /* success_memmodel */ __ATOMIC_SEQ_CST,
+			    /* failure_memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SUB_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value - *operand1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_INC_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value + (double) 1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      case UPC_DEC_OP:
+	do
+	  {
+            __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+	    new_value = orig_value - (double) 1;
+	  }
+	while (!__atomic_compare_exchange (target_ptr, &orig_value, &new_value,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST));
+        break;
+      default: break;
+    }
+  if (fetch_ptr != NULL)
+    *fetch_ptr = orig_value;
+}
+
+static void
+__upc_atomic_PTS (
+	PTS_type * restrict fetch_ptr,
+	upc_op_num_t op_num,
+	shared PTS_type * restrict target,
+	PTS_type * restrict operand1 __attribute__((unused)),
+	PTS_type * restrict operand2 __attribute__((unused)))
+{
+  PTS_type orig_value __attribute__((unused));
+  PTS_type new_value __attribute__((unused));
+  
+  int op_ok __attribute__((unused));
+  PTS_type *target_ptr = __cvtaddr (*(upc_shared_ptr_t *)&target);
+  switch (op_num)
+    {
+      case UPC_GET_OP:
+        __atomic_load (target_ptr, &orig_value, __ATOMIC_SEQ_CST);
+        break;
+      case UPC_SET_OP:
+	if (fetch_ptr == NULL)
+	  __atomic_store (target_ptr, operand1, __ATOMIC_SEQ_CST);
+	else
+	  __atomic_exchange (target_ptr, operand1, &orig_value,
+			     /* memmodel */ __ATOMIC_SEQ_CST);
+        break;
+      case UPC_CSWAP_OP:
+	orig_value = *operand1;
+	/* __atomic_compare_exchange will return the previous value
+	   in &orig_value independent of whether operand2 is written
+	   to the target location.  */
+	op_ok = __atomic_compare_exchange (target_ptr, &orig_value, operand2,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST);
+	/* If the previous compare exchange operation failed, check
+	   for UPC PTS equality (which ignores phase).  If the pointers
+	   compare as equal, try again.  */
+	if (!op_ok && (orig_value == *operand1))
+	  {
+            (void) __atomic_compare_exchange (target_ptr,
+	                        &orig_value, operand2,
+				/* weak */ 0,
+				/* success_memmodel */ __ATOMIC_SEQ_CST,
+				/* failure_memmodel */ __ATOMIC_SEQ_CST);
+	  }
+        break;
+      default: break;
+    }
+  if (fetch_ptr != NULL)
+    *fetch_ptr = orig_value;
+}
+
+/**
+ * UPC atomic relaxed operation.
+ *
+ * @param [in] domain Atomic domain
+ * @param [in] fetch_ptr Target of the update
+ * @param [in] op Atomic operation
+ * @param [in] target Target address of the operation
+ * @param [in] operand1 Operation required argument
+ * @param [in] operand2 Operation required argument
+ *
+ * @ingroup UPCATOMIC UPC Atomic Functions
+ */
+void
+upc_atomic_relaxed (upc_atomicdomain_t *domain,
+		   void * restrict fetch_ptr,
+		   upc_op_t op,
+		   shared void * restrict target,
+		   const void * restrict operand1,
+		   const void * restrict operand2)
+{
+  struct upc_atomicdomain_struct *ldomain =
+    (struct upc_atomicdomain_struct *) &domain[MYTHREAD];
+  upc_op_num_t op_num;
+  if (op & ~(-op))
+    __upc_fatal ("atomic operation (0x%llx) may have only "
+                 "a single bit set", (long long)op);
+  if (!__upc_atomic_is_valid_op (op))
+    __upc_fatal ("invalid atomic operation (0x%llx)",
+                 (long long)op);
+  op_num = __upc_atomic_op_num (op);
+  if (op & ~ldomain->ops)
+    __upc_fatal ("invalid operation (%s) for specified domain",
+	         __upc_atomic_op_name (op_num));
+  __upc_atomic_check_operands (op_num, fetch_ptr, operand1, operand2);
+  switch (ldomain->optype)
+    {
+    case UPC_INT:
+      __upc_atomic_I (
+	       (I_type *) fetch_ptr,
+	       op_num,
+	       (shared I_type *) target,
+	       (I_type *) operand1,
+	       (I_type *) operand2);
+      break;
+    case UPC_UINT:
+      __upc_atomic_UI (
+	       (UI_type *) fetch_ptr,
+	       op_num,
+	       (shared UI_type *) target,
+	       (UI_type *) operand1,
+	       (UI_type *) operand2);
+      break;
+    case UPC_LONG:
+      __upc_atomic_L (
+	       (L_type *) fetch_ptr,
+	       op_num,
+	       (shared L_type *) target,
+	       (L_type *) operand1,
+	       (L_type *) operand2);
+      break;
+    case UPC_ULONG:
+      __upc_atomic_UL (
+	       (UL_type *) fetch_ptr,
+	       op_num,
+	       (shared UL_type *) target,
+	       (UL_type *) operand1,
+	       (UL_type *) operand2);
+      break;
+    case UPC_LLONG:
+      __upc_atomic_LL (
+	       (LL_type *) fetch_ptr,
+	       op_num,
+	       (shared LL_type *) target,
+	       (LL_type *) operand1,
+	       (LL_type *) operand2);
+      break;
+    case UPC_ULLONG:
+      __upc_atomic_ULL (
+	       (ULL_type *) fetch_ptr,
+	       op_num,
+	       (shared ULL_type *) target,
+	       (ULL_type *) operand1,
+	       (ULL_type *) operand2);
+      break;
+    case UPC_INT32:
+      __upc_atomic_I32 (
+	       (I32_type *) fetch_ptr,
+	       op_num,
+	       (shared I32_type *) target,
+	       (I32_type *) operand1,
+	       (I32_type *) operand2);
+      break;
+    case UPC_UINT32:
+      __upc_atomic_UI32 (
+	       (UI32_type *) fetch_ptr,
+	       op_num,
+	       (shared UI32_type *) target,
+	       (UI32_type *) operand1,
+	       (UI32_type *) operand2);
+      break;
+    case UPC_INT64:
+      __upc_atomic_I64 (
+	       (I64_type *) fetch_ptr,
+	       op_num,
+	       (shared I64_type *) target,
+	       (I64_type *) operand1,
+	       (I64_type *) operand2);
+      break;
+    case UPC_UINT64:
+      __upc_atomic_UI64 (
+	       (UI64_type *) fetch_ptr,
+	       op_num,
+	       (shared UI64_type *) target,
+	       (UI64_type *) operand1,
+	       (UI64_type *) operand2);
+      break;
+    case UPC_FLOAT:
+      __upc_atomic_F (
+	       (F_type *) fetch_ptr,
+	       op_num,
+	       (shared F_type *) target,
+	       (F_type *) operand1,
+	       (F_type *) operand2);
+      break;
+    case UPC_DOUBLE:
+      __upc_atomic_D (
+	       (D_type *) fetch_ptr,
+	       op_num,
+	       (shared D_type *) target,
+	       (D_type *) operand1,
+	       (D_type *) operand2);
+      break;
+    case UPC_PTS:
+      __upc_atomic_PTS (
+	       (PTS_type *) fetch_ptr,
+	       op_num,
+	       (shared PTS_type *) target,
+	       (PTS_type *) operand1,
+	       (PTS_type *) operand2);
+      break;
+    }
+}
+
+/**
+ * UPC atomic strict operation.
+ *
+ * @param [in] domain Atomic domain
+ * @param [in] fetch_ptr Target of the update
+ * @param [in] op Atomic operation
+ * @param [in] target Target address of the operation
+ * @param [in] operand1 Operation required argument
+ * @param [in] operand2 Operation required argument
+ *
+ * @ingroup UPCATOMIC UPC Atomic Functions
+ */
+void
+upc_atomic_strict (upc_atomicdomain_t *domain,
+		   void * restrict fetch_ptr,
+		   upc_op_t op,
+		   shared void * restrict target,
+		   const void * restrict operand1,
+		   const void * restrict operand2)
+{
+  upc_fence;
+  upc_atomic_relaxed (domain, fetch_ptr, op, target, operand1, operand2);
+  upc_fence;
+}
+
+/**
+ * Collective allocation of atomic domain.
+ *
+ * Implementation uses native Portals4 atomic functions and the
+ * hint field is ignored.
+ *
+ * @parm [in] type Atomic operation type
+ * @parm [in] ops Atomic domain operations
+ * @parm [in] hints Atomic operation hint
+ * @retval Allocated atomic domain pointer
+ *
+ * @ingroup UPCATOMIC UPC Atomic Functions
+ */
+upc_atomicdomain_t *
+upc_all_atomicdomain_alloc (upc_type_t type,
+			    upc_op_t ops,
+			    __attribute__((unused)) upc_atomichint_t hints)
+{
+  upc_atomicdomain_t *domain;
+  struct upc_atomicdomain_struct *ldomain;
+  upc_op_t supported_ops;
+  if (!__upc_atomic_is_valid_type (type))
+    __upc_fatal ("unsupported atomic type: 0x%llx",
+                 (long long) type);
+  supported_ops = __upc_atomic_supported_ops (type);
+  if ((ops & ~supported_ops) != 0)
+    __upc_fatal ("one/more requested atomic operations (0x%llx) unsupported "
+                 "for type `%s'", (long long) ops,
+		 __upc_atomic_type_name (type));
+  domain = (upc_atomicdomain_t *)
+    upc_all_alloc (THREADS, sizeof (struct upc_atomicdomain_struct));
+  if (domain == NULL)
+    __upc_fatal ("unable to allocate atomic domain");
+  ldomain = (struct upc_atomicdomain_struct *)&domain[MYTHREAD];
+  ldomain->ops = ops;
+  ldomain->optype = type;
+  return domain;
+}
+
+/**
+ * Collective free of the atomic domain.
+ *
+ * @param [in] domain Pointer to atomic domain
+ *
+ * @ingroup UPCATOMIC UPC Atomic Functions
+ */
+void
+upc_all_atomicdomain_free (upc_atomicdomain_t * domain)
+{
+  assert (domain != NULL);
+  upc_barrier;
+  if (MYTHREAD == 0)
+    {
+      upc_free (domain);
+    }
+  upc_barrier;
+}
+
+/**
+ * Query implementation for expected performance.
+ *
+ * @parm [in] ops Atomic domain operations
+ * @parm [in] optype Atomic operation type
+ * @parm [in] addr Atomic address
+ * @retval Expected performance
+ *
+ * @ingroup UPCATOMIC UPC Atomic Functions
+ */
+int
+upc_atomic_isfast (__attribute__((unused)) upc_type_t optype,
+	 	   __attribute__((unused)) upc_op_t ops,
+		   __attribute__((unused)) shared void *addr)
+{
+  /* We could make the distinction that only operations
+     directly supported by the builtin atomics are "fast",
+     but for now ... everything in the SMP runtime is
+     defined to be fast.  */
+  return UPC_ATOMIC_PERFORMANCE_FAST;
+}
+
+/** @} */
Index: libgupc/smp/upc_backtrace.c
===================================================================
--- libgupc/smp/upc_backtrace.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_backtrace.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,443 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+#include "upc_sup.h"
+#include "upc_access.h"
+#include "upc_backtrace.h"
+#include <signal.h>
+#include <string.h>
+#if HAVE_EXECINFO_H
+#include <execinfo.h>
+#endif
+#if HAVE_LIMITS_H
+#include <limits.h>
+#endif
+
+/** Skip over frames belonging to the backtrace code itself.  */
+#define GUPCR_BT_SKIP_FRAME_CNT 3
+/** Maximum number of stack frames to display.  */
+#define GUPCR_BT_DEPTH_CNT 128
+
+#ifndef PATH_MAX
+#define PATH_MAX 1024
+#endif
+
+/** Default backtrace file name prefix.  */
+#define UPC_BACKTRACE_PREFIX "backtrace"
+
+/** Full path of the executable program.  */
+static char *__upc_abs_execname;
+
+/** Backtrace on faults enabled flag.  */
+static int bt_enabled = 0;
+
+/** 
+ * GLIBC backtrace.
+ *
+ * Show backtrace by using the GLIBC backtrace functionality.
+ * Backtrace is improved with the source file/line numbers if
+ * addr2line is available.
+ *
+ * By default backtrace lines are sent to the 'stderr' file
+ * descriptor.  However, an environment variable
+ * UPC_BACKTRACEFILE can be used to redirect the backtrace
+ * to an actual file and it is used as a simple prefix for
+ * the backtrace file. For example, if it is set to "/tmp/trace-upc",
+ * the actual trace file is going to be "/tmp/trace-upc-PID.MYTHREAD".
+ * If empty environment variable is provided, a simple "trace" prefix
+ * is used.
+ *
+ */
+void
+__upc_backtrace (void)
+{
+  void *strace[GUPCR_BT_DEPTH_CNT];
+  size_t size,i;
+  char **strace_str;
+  char *file_env;
+  int under_upc_main = 1;
+  FILE *traceout = stderr;
+  upc_info_p u = __upc_info;
+  if (!u)
+    __upc_fatal ("UPC runtime not initialized");
+
+  file_env = getenv (GUPCR_BACKTRACE_FILE_ENV);
+  if (file_env)
+    {
+      #define MAX_INT_STRING ".2147483647"
+      char *tracefile;
+      int len, lenw;
+      /* Use default trace file name if one not specified by the user.  */
+      if (!strlen (file_env))
+	file_env = (char *) UPC_BACKTRACE_PREFIX;
+      len = strlen (file_env) + strlen (MAX_INT_STRING) + 1;
+      tracefile = malloc (len);
+      if (!tracefile)
+        __upc_fatal ("cannot allocate (%d) memory for backtrace file %s",
+		     len, file_env);
+      lenw = snprintf (tracefile, len, "%s.%d", file_env, MYTHREAD);
+      if ((lenw >= len) || (lenw < 0))
+	__upc_fatal ("cannot create backtrace file name: %s", file_env);
+      traceout = fopen (tracefile, "w");
+      if (!traceout)
+	__upc_fatal ("cannot open backtrace file: %s", tracefile);
+      free (tracefile);
+    }
+  else
+    fprintf (traceout, "Thread %d backtrace:\n", MYTHREAD);
+
+  /* Use "backtrace" functionality of glibc to receive
+     backtrace addresses.  */
+  size = backtrace (strace, GUPCR_BT_DEPTH_CNT);
+  /* Add symbolic information to each address
+     and print the stack trace.  */
+  for (i = GUPCR_BT_SKIP_FRAME_CNT; i < size; i++)
+    {
+      if (under_upc_main)
+        {
+# if HAVE_UPC_BACKTRACE_ADDR2LINE
+	  /* Call addr2line to generate source files, line numbers,
+	     and functions.  In case of any error (malloc, snprintf)
+	     do not abort the program.  */
+	  FILE *a2l;
+	  #define CMD_TMPL "%s -f -e %s %p"
+	  /* Allow space for addr2line, filename, command line options,
+	     and address argument for addr2line.  */
+	  int cmd_size = strlen (GUPCR_BACKTRACE_ADDR2LINE) +
+			 strlen (__upc_abs_execname) +
+			 strlen (CMD_TMPL) +
+			 strlen ("0x1234567812345678");
+	  int sz;
+	  char *cmd = malloc (cmd_size);
+	  /* Create an actual addr2line command.  */
+	  sz = snprintf (cmd, cmd_size, CMD_TMPL, GUPCR_BACKTRACE_ADDR2LINE,
+			 __upc_abs_execname, strace[i]);
+	  if ((sz >= cmd_size) || (sz < 0))
+	    {
+	      fprintf (traceout, "unable to create addr2line "
+				 "command line\n");
+	      return;
+	    }
+	  /* Execute addr2line.  */
+	  a2l = popen (cmd, "r");
+	  free (cmd);
+	  if (a2l)
+	    {
+	      /* addr2line responds with two lines: procedure name and
+		 the file name with line number.  */
+	      int max_rep = 2 * FILENAME_MAX;
+	      /* Build a data structure that is identical to the
+		 structure returned by the glibc backtrace_symbol().  */
+	      struct back_trace {
+		char *addr;
+	        char data[1];
+	      };
+	      struct back_trace *rep = malloc (max_rep);
+	      int index = 0;
+	      if (!rep)
+		{
+		  fprintf (traceout, "unable to acquire memory "
+				     "for backtracing\n");
+		  return;
+		}
+	      rep->data[0] = '\0';
+	      /* Read addr2line response.  */
+	      while (fgets(&rep->data[index], max_rep-index, a2l))
+		{
+		  /* Remove all the new lines, as addr2line returns
+		     info in multiple lines.  */
+		  index = strlen (&rep->data[0]);
+		  if (rep->data[index - 1] == '\n')
+		    rep->data[index - 1] = ' ';
+		}
+	      pclose (a2l);
+	      rep->addr = &rep->data[0];
+	      strace_str = &rep->addr;
+	    }
+	  else
+	    {
+	      /* Somehow we failed to invoke addr2line, fall back
+	         to glibc.  */
+	      strace_str = backtrace_symbols (&strace[i], 1);
+	    }
+# else
+	  strace_str = backtrace_symbols (&strace[i], 1);
+# endif
+	  fprintf (traceout, "[%4d][%lld] %s\n", MYTHREAD, 
+	      (long long int) (i - GUPCR_BT_SKIP_FRAME_CNT), *strace_str);
+	  /* Extra info for the barrier. */
+	  if (strstr( *strace_str, "__upc_wait"))
+	    {
+	      fprintf (traceout, "[%4d]       BARRIER ID: %d\n", MYTHREAD, 
+		       __upc_barrier_id);
+	    }
+          if (strstr (*strace_str, "upc_main"))
+	    under_upc_main = 0;
+	  /* Symbol trace buffer must be released.  */
+	  free (strace_str);
+	}
+    }
+  fflush (traceout);
+  if (file_env)
+    fclose (traceout);
+}
+
+#define GUPCR_BACKTRACE_PID_BUFLEN 16
+
+/**
+ * Backtrace on fatal errors.
+ *
+ * Print backtrace (stack frames) on fatal errors: run-time
+ * fatal error or segmentation fault. 
+ *
+ * Only print backtrace if environment variable UPC_BACKTRACE
+ * is set to 1. The following order of backtrace capabilities
+ * is searched and executed:
+ *
+ * (1) Use GDB for backtrace (if enabled)
+ * (2) Use GLIBC backtrace with source file/line display (if
+ *     addr2line is available)
+ * (3) Use GLIBC backtrace with raw addresses (display is 
+ *     improved if -rdynamic option is supported by the linker)
+ *
+ */
+void
+__upc_fatal_backtrace (void)
+{
+  if (bt_enabled)
+    {
+#ifdef HAVE_UPC_BACKTRACE_GDB
+  	{
+	  char *env;
+	  const char *gdb;
+          char pid_buf[GUPCR_BACKTRACE_PID_BUFLEN];
+          int child_pid;
+          /* Which gdb to use? */
+          env = getenv (GUPCR_BACKTRACE_GDB_ENV);
+          if (!env || (strlen (env) == 0))
+              gdb = GUPCR_BACKTRACE_GDB;
+	  else
+              gdb = (const char *) env;
+	  if (strcmp (gdb, "none"))
+ 	    {
+	      const char *err_msg = 0;
+	      char tmpf[PATH_MAX];
+	      int fbt;
+	      const char *btcmd = "backtrace 30\n";
+              fprintf (stderr, "Thread %d GDB backtrace:\n", MYTHREAD);
+	      /* Get pid and name of the running program. */
+              sprintf(pid_buf, "%ld", (long) getpid());
+	      /* Create temp file for GDB commands. */
+	      if ((fbt = __upc_create_temp_file 
+			 ("upc_bt_gdb.XXXXXX", tmpf, &err_msg)) == -1)
+	   	{
+		  fprintf (stderr, "cannot open gdb command - %s\n", err_msg);
+		  return;
+		}
+	      if (write (fbt, btcmd, sizeof (btcmd)) == -1)
+ 		{
+		  perror ("cannot write gdb command file for backtrace");
+		  return;
+		}
+	      if (close (fbt))
+ 		{
+		  perror ("cannot close gdb command file for backtrace");
+		  return;
+		}
+              child_pid = fork();
+              if (!child_pid)
+		{
+		  dup2(2,1);
+		  execlp(gdb, gdb, "-nx", "-batch", "-x", tmpf, 
+		         __upc_abs_execname, pid_buf, NULL);
+		  fprintf (stderr, "cannot start GDB - %s\n", gdb);
+		  abort(); /* If gdb failed to start */
+		}
+	      else
+		waitpid(child_pid,NULL,0);
+	      unlink (tmpf);
+              return;
+	    }
+        }
+#endif /* GUPCR_BACKTRACE_GDB */
+
+       /* Simple backtrace only. */
+       __upc_backtrace ();
+    }
+}
+
+/**
+ * Print thread/process mapping OR
+ *   request a trace dump from UPC threads.
+ */
+static void
+__upc_backtrace_monitor (void)
+{
+  int i;
+  char *trace_file_name;
+  trace_file_name = getenv (GUPCR_BACKTRACE_FILE_ENV);
+  if (trace_file_name)
+    {
+      /* Dump backtraces into files.
+         Send signal to all UPC threads.  */
+      fprintf (stderr, "Thread monitor\n");
+      fprintf (stderr, "Sending requests for trace dump\n");
+      for (i = 0; i < THREADS; i++)
+	{
+	  kill (__upc_info->thread_info[i].pid, GUPCR_BACKTRACE_SIGNAL);
+	}
+    }
+  else
+    {
+      fprintf (stderr, "Thread ID to PID mappings\n");
+      fprintf (stderr, " Thread   PID\n");
+      for (i = 0; i < THREADS; i++)
+	{
+	  fprintf (stderr,
+	           "   %4d   %ld\n", i, (long) __upc_info->thread_info[i].pid);
+	}
+    }
+}
+
+/**
+ * Backtrace signal handler.
+ *
+ * Display stack frames on a request. In case of the
+ * monitor thread only print the mappings between the 
+ * UPC threads and processes.
+ */
+static void
+__upc_backtrace_handler (int sig __attribute__ ((unused)),
+			 siginfo_t *siginfo __attribute__ ((unused)),
+			 void *context __attribute__ ((unused)))
+{
+  if (MYTHREAD == -1)
+    __upc_backtrace_monitor ();
+  else
+    __upc_backtrace ();
+}
+
+/**
+ * Backtrace fault handler.
+ *
+ * A fault happened and backtrace is enabled. Allow for only
+ * one thread to print the backtrace. The restore signal
+ * handlers to their default and return ensures that 
+ * signal terminates the thread and allows for the monitor
+ * thread to terminate all the other threads..
+ */
+static void
+__upc_fault_handler (int sig __attribute__ ((unused)),
+	  	     siginfo_t *siginfo __attribute__ ((unused)),
+		     void *context __attribute__ ((unused)))
+{
+  upc_info_p u = __upc_info;
+  if (u)
+    __upc_acquire_lock (&u->lock);
+  __upc_backtrace_restore_handlers ();
+  __upc_fatal_backtrace ();
+}
+
+/**
+ * Initialize UPC backtrace.
+ */
+void
+__upc_backtrace_init (const char *execname)
+{
+  char *env;
+  /* Find the full path for the executable. On linux systems we
+     might be able to read "/proc/self/exe" to the get the full
+     executable path. But, it is not portable. */
+  int slen = sizeof (__upc_abs_execname) - strlen (execname) - 2;
+  __upc_abs_execname = malloc (PATH_MAX + 1);
+  if (!__upc_abs_execname)
+    __upc_fatal ("cannot allocate space for executable file name");
+  *__upc_abs_execname = '\0';
+  if (execname[0] != '/')
+    {
+      if (!getcwd (__upc_abs_execname, slen))
+        strcpy (__upc_abs_execname, "/BT_CANNOT_CREATE_ABS_PATH");
+      strcat (__upc_abs_execname, "/");
+    }
+  strcat (__upc_abs_execname, execname);
+
+#ifdef HAVE_UPC_BACKTRACE_SIGNAL
+  {
+    /* Install backtrace signal handler (backtrace on request). */
+    struct sigaction act;
+    memset (&act, '\0', sizeof(act));
+    act.sa_sigaction = &__upc_backtrace_handler;
+    act.sa_flags = SA_SIGINFO;
+    if (sigaction(GUPCR_BACKTRACE_SIGNAL, &act, NULL) < 0) {
+      perror ("was not able to install backtrace handler");
+    }
+  }
+#endif
+
+  /* Install signal handlers only if backtrace is enabled.  */
+  env = getenv (GUPCR_BACKTRACE_ENV);
+  if (env)
+    bt_enabled = atoi (env);
+  
+  if (bt_enabled)
+    {
+      struct sigaction act;
+      memset (&act, '\0', sizeof(act));
+      act.sa_sigaction = &__upc_fault_handler;
+      act.sa_flags = SA_SIGINFO;
+      if (sigaction(SIGABRT, &act, NULL) < 0)
+        perror ("unable to install SIGABRT handler");
+      if (sigaction(SIGILL, &act, NULL) < 0)
+        perror ("unable to install SIGILL handler");
+      if (sigaction(SIGSEGV, &act, NULL) < 0)
+        perror ("unable to install SIGSEGV handler");
+      if (sigaction(SIGBUS, &act, NULL) < 0)
+        perror ("unable to install SIGBUS handler");
+      if (sigaction(SIGFPE, &act, NULL) < 0)
+        perror ("unable to install SIGFPE handler");
+    }
+}
+
+/**
+ * Restore default handlers.
+ *
+ * Has to be called once the run-time discovered
+ * a fatal error.
+ */ 
+void
+__upc_backtrace_restore_handlers (void)
+{
+  /* Don't handle any signals with backtrace code. Install
+     default handlers.  */
+  signal (SIGABRT, SIG_DFL);
+  signal (SIGILL, SIG_DFL);
+  signal (SIGSEGV, SIG_DFL);
+  signal (SIGBUS, SIG_DFL);
+  signal (SIGFPE, SIG_DFL);
+}
Index: libgupc/smp/upc_backtrace.h
===================================================================
--- libgupc/smp/upc_backtrace.h	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_backtrace.h	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,45 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#ifndef _UPC_BACKTRACE_H_
+#define _UPC_BACKTRACE_H_
+
+/* Environment variables. */
+/** Enable/Disable backtrace env variable. */
+#define GUPCR_BACKTRACE_ENV "UPC_BACKTRACE"
+/** Enable/Disable STAT backtrace env variable. */
+#define GUPCR_BACKTRACE_FILE_ENV "UPC_BACKTRACEFILE"
+/** GDB command for backtrace env variable. */
+#define GUPCR_BACKTRACE_GDB_ENV "UPC_BACKTRACE_GDB"
+
+/* Interfaces. */
+extern void __upc_backtrace (void);
+extern void __upc_fatal_backtrace (void);
+extern void __upc_backtrace_init (const char *execname);
+extern void __upc_backtrace_restore_handlers (void);
+
+#endif /* !_UPC_BACKTRACE_H_ */
Index: libgupc/smp/upc_barrier.upc
===================================================================
--- libgupc/smp/upc_barrier.upc	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_barrier.upc	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,395 @@
+/* Copyright (C) 2001-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/*
+ * UPC barrier implementation support routines.
+ *
+ * The UPC barrier synchronization statements are:
+ *  - upc_notify <i>expression</i>
+ *  - upc_wait <i>expression</i>
+ *  - upc_barrier <i>expression</i>
+ *
+ * The upc barrier statement is equivalent to the compound statement:
+ *   <i>{ upc_notify barrier_value; upc_wait barrier_value; }</i>
+ *
+ * The UPC runtime implementation of the barrier organizes the UPC
+ * threads in a form of a tree with a configurable tree fanout.  Each
+ * thread uses the following data structures:
+ *
+ * * A shared array of barrier block structures. Each thread has the
+ *   barrier block structure consisting of the following variables:
+ *   - notify    - Atomically incremented by the thread and its children
+ *               whenever they arrive on the notify statement.  Once all
+ *		 of them arrive, the parent of the thread is notified.
+ *   - wait      - Signaling field for parent to inform children that they
+ *		 are allowed to proceed from the wait phase.
+ *   - id[2]     - Barrier ID that thread is waiting on. There are two
+ *               barrier IDs to distinguish the correct notify/barrier
+ *               sequence (a thread can be in a notify phase while children
+ *               are still in the previous barrier wait state and need the
+ *		 parent's barrier ID to compare against their own).
+ * * A local array of threads' notify counts required to complete the notify
+ *   phase (as we use atomic fetch and add function the required number of
+ *   of notifications is equal to the children count).
+ *
+ * BARRIER NOTIFY
+ *
+ *  * Each leaf thread atomically increments the 'notify' field of the parent's
+ *    barrier block.  Others atomically increment the same filed in their own
+ *    barrier block.  The number of notifies before increment is returned back.
+ *  * If number of notifies is equal to the thread's notify count the parent
+ *    of the thread must be notified.  This propagates notification to the
+ *    top of the tree (no thread waits for anyone in the notify phase).  Before
+ *    parent is notified, the MAX barrier ID of the thread and its children
+ *    is calculated and set as the effective thread's barrier ID.  At the
+ *    end, the root thread has the MAX calculated ID for all threads.
+ *  * The last thread notifying the root thread is also responsible for
+ *    releasing the root thread from the wait.
+ *
+ * BARRIER WAIT
+ *
+ *  * Each parent (or another thread on behalf of the parent) atomically
+ *    increments the 'wait' filed of its children (all threads completed
+ *    the notify phase)..
+ *  * Each thread also atomically increments the 'wait' field in its own
+ *    barrier block (thread arrived on the wait statement).
+ *  * Whoever incremented the filed first is responsible for further
+ *    releasing thread's children. This makes sure that all children of a
+ *    thread that has not arrived on the wait statement are allowed
+ *    to complete their wait statements (split phase barrier).
+ *
+ * Current limitations:
+ *   - Recursive behavior in the notify and wait statements can lead into
+ *     more work for some of the threads.
+ */
+
+#include <upc.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+/* Thread's children.  */
+static int *__upc_child;
+/* Thread's children count.  */
+static int __upc_child_cnt;
+/* Thread's parent thread.  */
+static int __upc_parent;
+
+/* Thread tree definitions.  */
+#define ROOT_PARENT	-1
+#define ROOT_NODE	0
+#define INNER_NODE	1
+#define LEAF_NODE	2
+#define ROOT_THREAD	(__upc_node == ROOT_NODE)
+#define INNER_THREAD	(__upc_node == INNER_NODE)
+#define LEAF_THREAD	(__upc_node == LEAF_NODE)
+int __upc_node;
+
+/* Notify counts for each thread.  */
+int *__upc_notify_cnt;
+
+/* Per thread barrier structure.  */
+struct barrier_block
+{
+  int notify;
+  int wait;
+  int id[2];
+};
+
+typedef struct barrier_block barrier_block_t;
+strict shared barrier_block_t __upc_btree[THREADS];
+/* Alternative barrier count (even/odd). Need to distinguish barrier IDs from
+   two consecutive barriers as some threads might enter the notify statements
+   while the others have not completed the wait statement of the previous
+   barrier.  */
+static int __upc_bphase = 0;
+/* Atomic increment values.  */
+/* Thread arrived first on the wait (parent trying to release
+   children, or thread waiting for the parent.  */
+#define GUPCR_BARRIER_FIRST_ON_WAIT 0
+/* Both parent and thread arrived.  */
+#define GUPCR_BARRIER_WAIT_COMPLETED 2
+
+/* Per-thread flag set by upc_notify() and cleared by upc_wait().  */
+static GUPCR_THREAD_LOCAL int __upc_barrier_active = 0;
+
+/* Per-thread active barrier ID.  */
+GUPCR_THREAD_LOCAL int __upc_barrier_id = 0;
+
+/*
+ * Shared integer atomic increment.
+ *
+ */
+__attribute__ ((__always_inline__))
+static inline
+int
+__upc_atomic_inc (strict shared void *p)
+{
+  int *addr = __upc_map_to_local (p);
+  return __upc_sync_fetch_and_add (addr, 1);
+}
+
+/*
+ * Adjust thread's barrier ID.
+ *
+ * The MAX barrier ID among all threads is being propagated
+ * to the top of the tree. Adjust barrier ID of the thread
+ * to the MAX among the thread and its children.
+ *
+ */
+__attribute__ ((__always_inline__))
+static inline
+void
+__upc_adjust_barrier_id (int thread)
+{
+  int i, maxbid;
+  maxbid = __upc_btree[thread].id[__upc_bphase];
+  for (i = 0; i < GUPCR_TREE_FANOUT; i++)
+    {
+      int child = GUPCR_TREE_FANOUT * thread + i + 1;
+      if (child < THREADS)
+	{
+	  int bid = __upc_btree[child].id[__upc_bphase];
+	  if (maxbid < bid)
+	    maxbid = bid;
+	}
+    }
+  __upc_btree[thread].id[__upc_bphase] = maxbid;
+}
+
+/*
+ * Release waiting thread.
+ *
+ * Signal to the specified thread that it can complete
+ * the wait phase.
+ *
+ * This is a recursive function. If the specified thread did not
+ * arrive on the wait 'gate', the calling thread must release
+ * all its children with atomic inc into their wait fileds.
+ *
+ */
+static inline
+void
+__upc_release_wait (int thread)
+{
+  int wait_cnt = __upc_atomic_inc (&__upc_btree[thread].wait);
+  if (wait_cnt == GUPCR_BARRIER_FIRST_ON_WAIT)
+    {
+      int i;
+      /* Parent arrived first.  Make agreed on MAX barrier ID available
+	 to children before releasing them.  */
+      if (INNER_THREAD)
+	__upc_btree[thread].id[__upc_bphase] =
+	  __upc_btree[(thread - 1) / GUPCR_TREE_FANOUT].id[__upc_bphase];
+      for (i = 0; i < GUPCR_TREE_FANOUT; i++)
+	{
+	  int child = GUPCR_TREE_FANOUT * thread + i + 1;
+	  if (child < THREADS)
+	    {
+	      __upc_release_wait (child);
+	    }
+	}
+    }
+}
+
+/*
+ * UPC notify statement implementation.
+ */
+void
+__upc_notify (int barrier_id)
+{
+  int notify_cnt;
+  int notify_thread;
+  if (__upc_barrier_active)
+    __upc_fatal ("Two successive upc_notify statements executed "
+		 "without an intervening upc_wait");
+  __upc_barrier_active = 1;
+  __upc_barrier_id = barrier_id;
+
+  /* Initialize thread's barrier block.  */
+  __upc_btree[MYTHREAD].id[__upc_bphase] = barrier_id;
+  __upc_btree[MYTHREAD].wait = 0;
+
+  /* Notify that thread arrived.  */
+  if (LEAF_THREAD)
+    notify_thread = __upc_parent;
+  else
+    notify_thread = MYTHREAD;
+  notify_cnt = __upc_atomic_inc (&__upc_btree[notify_thread].notify);
+  if (notify_cnt == __upc_notify_cnt[notify_thread])
+    {
+      /* Notify count reached the expected notification count (thread
+	 and all its children arrived on notification phase).
+	 Must traverse the tree and inform parent of the thread.  */
+      do
+	{
+	  __upc_btree[notify_thread].notify = 0;
+	  /* Adjust the barrier ID with the MAX of the
+	     thread and its children.  */
+	  __upc_adjust_barrier_id (notify_thread);
+	  if (notify_thread == 0)
+	    {
+	      /* Reached the top of the tree.  Release the root
+		 thread from the wait.  */
+	      __upc_release_wait (notify_thread);
+	      break;
+	    }
+	  /* The parent of the thread is the new thread that has
+	     to be notified.  */
+	  notify_thread = (notify_thread - 1) / GUPCR_TREE_FANOUT;
+	}
+      while (__upc_notify_cnt[notify_thread] ==
+	     __upc_atomic_inc (&__upc_btree[notify_thread].notify));
+    }
+}
+
+/*
+ * UPC wait statement implementation
+ */
+void
+__upc_wait (int barrier_id)
+{
+  int wait_cnt, i;
+
+  if (!__upc_barrier_active)
+    __upc_fatal ("upc_wait statement executed without a "
+		 "preceding upc_notify");
+  /* Check the barrier ID with the one from the notify phase.  */
+  if (barrier_id != INT_MIN && __upc_barrier_id != INT_MIN &&
+      __upc_barrier_id != barrier_id)
+    {
+      __upc_fatal ("UPC barrier identifier mismatch");
+    }
+
+  /* Announce the thread on the wait phase.  */
+  wait_cnt = __upc_atomic_inc (&__upc_btree[MYTHREAD].wait);
+  if (wait_cnt == GUPCR_BARRIER_FIRST_ON_WAIT)
+    {
+      /* Must wait for the parent.  */
+      int *wait_ptr = (int *) &__upc_btree[MYTHREAD].wait;
+      __upc_spin_until (*wait_ptr == GUPCR_BARRIER_WAIT_COMPLETED);
+    }
+
+  if (wait_cnt == GUPCR_BARRIER_FIRST_ON_WAIT)
+    {
+      /* Thread arrived before parent and waited for the release
+	 from the parent.  Release all the children from the wait
+	 and make agreed on MAX barrier ID available to them.  */
+      if (INNER_THREAD)
+	__upc_btree[MYTHREAD].id[__upc_bphase] =
+	  __upc_btree[__upc_parent].id[__upc_bphase];
+      for (i = 0; i < __upc_child_cnt; i++)
+	__upc_release_wait (__upc_child[i]);
+    }
+
+  /* Compare barrier ID with parent's barrier ID.  */
+  if (barrier_id != INT_MIN)
+    {
+      int exp;
+      if (ROOT_THREAD)
+	exp = __upc_btree[MYTHREAD].id[__upc_bphase];
+      else
+        exp = __upc_btree[__upc_parent].id[__upc_bphase];
+      if (exp != INT_MIN && exp != barrier_id)
+        {
+	  __upc_fatal ("UPC barrier identifier mismatch");
+	}
+    }
+
+  __upc_barrier_active = 0;
+  if (__upc_bphase)
+    __upc_bphase = 0;
+  else
+    __upc_bphase = 1;
+  upc_fence;
+}
+
+/* 
+ * UPC barrier implementation.
+ */
+void
+__upc_barrier (int barrier_id)
+{
+  __upc_notify (barrier_id);
+  __upc_wait (barrier_id);
+}
+
+/*
+ * Initialize barrier.
+ *
+ * Initialize barrier data structures. A node tree is
+ * used to signal/ack thread's arrival on the barrier.
+ */
+void
+__upc_barrier_init (void)
+{
+  int i, thread;
+
+  /* Allocate space for children thread numbers.  */
+  __upc_child = malloc (GUPCR_TREE_FANOUT * sizeof (int));
+
+  /* Find all children of this thread. */
+  for (i = 0; i < GUPCR_TREE_FANOUT; i++)
+    {
+      int child = GUPCR_TREE_FANOUT * MYTHREAD + i + 1;
+      if (child < THREADS)
+	{
+	  __upc_child_cnt++;
+	  __upc_child[i] = child;
+	}
+    }
+  if (MYTHREAD == 0)
+    __upc_parent = ROOT_PARENT;
+  else
+    __upc_parent = (MYTHREAD - 1) / GUPCR_TREE_FANOUT;
+
+  /* Set the node assignment for this thread.  */
+  if (!MYTHREAD)
+    __upc_node = ROOT_NODE;
+  else if (__upc_child_cnt)
+    __upc_node = INNER_NODE;
+  else
+    __upc_node = LEAF_NODE;
+
+  /* Calculate notifications for each thread. Equal to children
+     count as atomic fetch and add is used.  */
+  __upc_notify_cnt = malloc (THREADS * sizeof (int));
+  if (!__upc_notify_cnt)
+    __upc_fatal
+      ("UPC barrier initialization failed - cannot allocate memory");
+  for (thread = 0; thread < THREADS; thread++)
+    {
+      __upc_notify_cnt[thread] = 0;
+      for (i = 0; i < GUPCR_TREE_FANOUT; i++)
+	{
+	  int child = GUPCR_TREE_FANOUT * thread + i + 1;
+	  if (child < THREADS)
+	    {
+	      __upc_notify_cnt[thread]++;
+	    }
+	}
+    }
+}
+
Index: libgupc/smp/upc_castable.upc
===================================================================
--- libgupc/smp/upc_castable.upc	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_castable.upc	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,52 @@
+/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include <upc.h>
+#include <upc_castable.h>
+
+void *
+upc_cast (const shared void *ptr)
+{
+  const size_t thread = upc_threadof ((shared void *) ptr);
+  void *local_ptr = NULL;
+  if (thread == (size_t) MYTHREAD)
+    {
+      local_ptr = (void *) ptr;
+    }
+  return local_ptr;
+}
+
+upc_thread_info_t
+upc_thread_info (size_t thread)
+{
+  upc_thread_info_t cast_info = { 0, 0 };
+  if (thread == (size_t) MYTHREAD)
+    {
+      cast_info.guaranteedCastable = UPC_CASTABLE_ALL;
+      cast_info.probablyCastable = UPC_CASTABLE_ALL;
+    }
+  return cast_info;
+}
Index: libgupc/smp/upc_config.h
===================================================================
--- libgupc/smp/upc_config.h	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_config.h	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,270 @@
+/* Copyright (C) 2001-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _UPC_CONFIG_H_
+#define _UPC_CONFIG_H_
+
+#include "config.h"
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+
+#if TIME_WITH_SYS_TIME
+# include <sys/time.h>
+# include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+#  include <sys/time.h>
+# else
+#  include <time.h>
+# endif
+#endif
+
+#ifdef __sgi__
+#include <ulocks.h>
+#include <sys/sysmp.h>
+#endif
+
+#ifdef _POSIX_PRIORITY_SCHEDULING
+#ifndef __USE_GNU
+#define __USE_GNU
+#endif
+#include <sched.h>
+#endif
+
+#include <netdb.h>
+#include <netinet/in.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+
+#ifdef GUPCR_USE_PTHREADS
+#include <pthread.h>
+#define GUPCR_THREAD_LOCAL __thread
+#else
+#define GUPCR_THREAD_LOCAL
+#endif
+
+#define DEV_ZERO "/dev/zero"
+#define OFFSET_ZERO ((off_t) 0)
+/* Darwin has MAP_ANON defined for anonymous memory map */
+#if !MAP_ANONYMOUS && MAP_ANON
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+#define MAP_ERROR ((void *) -1)
+
+#define KILOBYTE 1024
+#define C64K (64*KILOBYTE)
+#define MEGABYTE (KILOBYTE*KILOBYTE)
+
+#ifndef LONG_LONG_BITS
+#define LONG_LONG_BITS (__SIZEOF_LONG_LONG__ * 8)
+#endif /* LONG_LONG_BITS */
+
+#ifndef SIZE_T_BITS
+#define SIZE_T_BITS (__SIZEOF_SIZE_T__ * 8)
+#endif /* SIZE_T_BITS */
+
+//begin detect_target64
+#if (defined(_LP64) && _LP64) \
+    || (defined(_MIPS_SZPTR) && (_MIPS_SZPTR == 64)) \
+    || (defined(_CRAYT3E))
+#define GUPCR_TARGET64 1
+#else
+#define GUPCR_TARGET64 0
+#endif
+//end detect_target64
+
+//begin mode_types
+typedef unsigned int u_intQI_t __attribute__ ((__mode__(__QI__)));
+typedef unsigned int u_intHI_t __attribute__ ((__mode__(__HI__)));
+typedef unsigned int u_intSI_t __attribute__ ((__mode__(__SI__)));
+typedef unsigned int u_intDI_t __attribute__ ((__mode__(__DI__)));
+#if GUPCR_TARGET64
+typedef unsigned int u_intTI_t __attribute__ ((__mode__(__TI__)));
+#endif /* GUPCR_TARGET64 */
+//end mode_types
+
+//begin lib_config_vm
+#if GUPCR_TARGET64
+/* On 64-bit machines, use page size of 32M (25 bits) and a max per thread
+   offset of 256G (38 bits).  This leaves 13 bits for the per thread
+   number of pages.  */
+#define GUPCR_VM_OFFSET_BITS 25 
+#if GUPCR_PTS_VADDR_SIZE > 38
+#define GUPCR_VM_MAX_PAGES_PER_THREAD \
+	(1 << (38 - GUPCR_VM_OFFSET_BITS))
+#else
+#define GUPCR_VM_MAX_PAGES_PER_THREAD \
+	(1 << (GUPCR_PTS_VADDR_SIZE - GUPCR_VM_OFFSET_BITS))
+#endif
+#else
+/* On 32-bit machines, use page size of 4M (22 bits) and a max per thread
+   offset of 4G (32 bits).  This leaves 10 bits for the per thread
+   number of pages.  */
+#define GUPCR_VM_OFFSET_BITS 22
+#if GUPCR_PTS_VADDR_SIZE > 32
+#define GUPCR_VM_MAX_PAGES_PER_THREAD \
+	(1 << (32 - GUPCR_VM_OFFSET_BITS))
+#else
+#define GUPCR_VM_MAX_PAGES_PER_THREAD \
+	(1 << (GUPCR_PTS_VADDR_SIZE - GUPCR_VM_OFFSET_BITS))
+#endif
+#endif /* GUPCR_TARGET64 */
+
+/* Derive some VM specific constants. */
+#define GUPCR_VM_PAGE_MASK (GUPCR_VM_MAX_PAGES_PER_THREAD - 1)
+#define GUPCR_VM_PAGE_SIZE (1 << GUPCR_VM_OFFSET_BITS)
+#define GUPCR_VM_OFFSET_MASK (GUPCR_VM_PAGE_SIZE - 1)
+/* Declare a type sufficiently large to hold a page number.
+   We can probably get by with a 'short' here, but it is
+   safer to just use a full 'int'.*/
+typedef unsigned int upc_page_num_t;
+
+/* Each thread caches a mapping between global page number
+   and local mapped address.  The global page number is
+   hashed into a global map cache, which is N-way associative,
+   where GUPCR_VM_GLOBAL_SET_SIZE defines the value of N.  */
+#define GUPCR_VM_GLOBAL_MAP_BITS 6 
+#define GUPCR_VM_GLOBAL_MAP_SIZE (1 << GUPCR_VM_GLOBAL_MAP_BITS)
+#define GUPCR_VM_GLOBAL_MAP_MASK (GUPCR_VM_GLOBAL_MAP_SIZE - 1)
+#define GUPCR_VM_GLOGAl_MAP_SET_SIZE 4
+/* All 1's for the virtual page number in a global map entry (GME)
+   indicates that the entry has not yet been mapped. */
+#define GUPCR_VM_PAGE_INVALID -1U
+//end lib_config_vm
+
+//begin lib_min_max
+#ifndef INT_MIN
+/* __INT_MAX__ is predefined by the gcc compiler */
+#  define INT_MIN (-__INT_MAX__ - 1)
+#endif
+
+/* helper functions */
+#define GUPCR_MIN(x,y) (((x) < (y)) ? (x): (y))
+#define GUPCR_MAX(x,y) (((x) > (y)) ? (x): (y))
+#define GUPCR_ABS(x) (((x) > 0) ? (x): -(x))
+#define GUPCR_ROUND(x, r) (((x) + (r) - 1)/(r)*(r))
+//end lib_min_max
+
+//begin lib_config_heap
+
+/* Max. heap size
+   Set here as 64 gigabytes on a 64-bit implementation
+   and 1 gigabyte on other (eg, 32 bit) implementations. */
+#define GUPCR_MAX_HEAP_SIZE (((sizeof (void *)*8) == 64) \
+                              ? (64L * KILOBYTE * MEGABYTE) \
+			      : ( 1L * KILOBYTE * MEGABYTE))
+
+/* Per-thread space initially allocated to UPC user's heap */
+#define GUPCR_DEFAULT_PER_THREAD_HEAP_SIZE (16*MEGABYTE)
+
+/* Per-thread maximum stack size that will be added to the OS's
+   default stack size, when creating pthreads.  */
+#define GUPCR_DEFAULT_PER_THREAD_STACK_SIZE (16*MEGABYTE)
+
+/* The minimum number of bytes to allocate */
+#define GUPCR_HEAP_ALLOC_MIN 64
+
+/* Heaps are increased by multiples of this chunk size.
+   The chunk size should be an even multiple of the UPC VM page size.  */
+#define GUPCR_HEAP_CHUNK_SIZE (1*GUPCR_VM_PAGE_SIZE)
+
+/* an unlikely barrier id to be used for runtime synchronization */
+#define GUPCR_RUNTIME_BARRIER_ID 0xBADF00D
+
+/* a value used to tag each heap allocated item, checked by upc_free */
+#define GUPCR_HEAP_ALLOC_TAG 0x0DDF00D
+//end lib_config_heap
+
+/* By default we let kernel schedule threads */
+#define GUPCR_SCHED_POLICY_DEFAULT GUPCR_SCHED_POLICY_AUTO
+#define GUPCR_MEM_POLICY_DEFAULT GUPCR_MEM_POLICY_AUTO
+
+/* Enable GUM debug support via this environment variable.  */
+#define GUM_DEBUG_ENV "GUM_DEBUG"
+
+/* Specify the GUM port as "host:port" via this environment variable.  */
+#define GUM_PORT_ENV "GUM_PORT"
+
+/* Use this environment variable to specify the time (in seconds) that
+   the UPC process should delay to give gdbserver
+   a chance to attach to it.  */
+#define GUM_ATTACH_DELAY_ENV "GUM_ATTACH_DELAY"
+
+/* Specify the full pathname of gdbsever via this environment variable.  */
+#define GUM_GDBSERVERPATH_ENV "GDBSERVERPATH"
+
+/* Default GUM host */
+#define GUM_HOST_DEFAULT "localhost"
+
+/* Default GUM port */
+#define GUM_PORT_DEFAULT 1234
+
+/* Default gdbserver attach delay (in seconds)  */
+#define GUM_ATTACH_DELAY_DEFAULT 10
+
+/* GUM initialization routine called at start up */
+extern void __upc_gum_init (int, int);
+
+/*
+ * Main entry for UPC programs.
+ * The runtime will execute before calling the user's main
+ * program.  Thus, the user's main program will renamed
+ * inside of the <upc.h> file to 'upc_main'
+ */
+#define GUPCR_START main
+#define GUPCR_MAIN upc_main
+
+//begin lib_config_shared_section
+
+/* The base address of the UPC shared section */
+#define GUPCR_SHARED_SECTION_START __upc_shared_start
+/* The ending address (plus one) of the UPC shared section */
+#define GUPCR_SHARED_SECTION_END __upc_shared_end
+
+/* The base address of the UPC compiled program info. section */
+#define GUPCR_PGM_INFO_SECTION_START __upc_pgm_info_start
+/* The ending address (plus one) of the UPC compiled program info. section */
+#define GUPCR_PGM_INFO_SECTION_END __upc_pgm_info_end
+
+/* The base address of an array of pointers to UPC initialization routines.  */
+#define GUPCR_INIT_ARRAY_START __upc_init_array_start
+/* The ending address (plus one) of pointers to UPC initialization routines */
+#define GUPCR_INIT_ARRAY_END   __upc_init_array_end
+
+//end lib_config_shared_section
+
+#endif /* _UPC_CONFIG_H_ */
Index: libgupc/smp/upc_debug.c
===================================================================
--- libgupc/smp/upc_debug.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_debug.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,74 @@
+/* Copyright (C) 2003-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "upc_debug.h"
+
+/* MPIR Interface support for debugging.
+   http://www.mpi-forum.org/docs/mpir-specification-10-11-2010.pdf
+
+   MPIR_being_debugged is being set by the debugger to 1.
+   As we support MPIR_partial_attach_ok (section 9.13), all
+   threads are in the hold mode and continue to run only after the
+   debugger continues the monitor thread from the MPRI_breakpoint().
+   If debugger wants to attach to any of the threads, thread's gate
+   must be lowered by the debugger.  */
+   
+MPIR_PROCDESC *MPIR_proctable = 0;
+int MPIR_proctable_size = 0;
+const char *MPIR_debug_abort_string = 0;
+volatile int MPIR_debug_state;
+volatile int MPIR_debug_gate = 1; /* Threads continue to run by default.  */
+int MPIR_being_debugged;	  /* Set by the debugger.  */
+int MPIR_partial_attach_ok;	  /* OK to attach to subset of threads.  */
+
+/* Debugging breakpoint.
+   Subroutine called by the starter process to notify the debugger
+   that an MPIR event has occurred.  */
+void
+MPIR_Breakpoint (void)
+{
+}
+
+/* Tell the debugger that this initial process is not to be
+   included in the set of processes which form the UPC program.  */
+void
+MPIR_i_am_starter (void)
+{
+}
+
+/* Tell the debugger that we're not really MPI after all.  */
+void
+MPIR_ignore_queues (void)
+{
+}
+
+/* Tell the debugger to display "main" if we stop immediately
+   after acquiring the processes at startup time.  */
+void
+MPIR_force_to_main (void)
+{
+}
+
Index: libgupc/smp/upc_debug.h
===================================================================
--- libgupc/smp/upc_debug.h	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_debug.h	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,96 @@
+/* Copyright (C) 2003-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* 
+ * This file defines an interface to allow a debugger easily to 
+ * acquire all of the UPC processes at startup time.
+ *
+ * It exploits the interface used by MPICH http://www-unix.mcs.anl.gov/mpi/mpich/
+ * to interface to debuggers.
+ */
+
+#ifndef _UPC_DEBUG_H_
+#define _UPC_DEBUG_H_
+
+/**************************************************************************
+ * These functions are our interface to the debugger.
+ */
+
+/* A little struct to hold the target processor name and pid for
+ * each process which forms part of the MPI program.
+ *
+ * For UPC we probably don't need the host_name or executable_name,
+ * but it's easier to use the existing interface than bother to leave
+ * them out. Provided we zero them the debugger will assume they're the
+ * same as the initial program.
+ *
+ * DO NOT change the name of this structure or its fields. The debugger knows
+ * them, and will be confused if you change them.
+ */
+typedef struct 
+{
+  char * host_name;           /* Something we can pass to inet_addr */
+  char * executable_name;     /* The name of the image */
+  int    pid;		      /* The pid of the process */
+} MPIR_PROCDESC;
+
+/* Array of procdescs for debugging purposes */
+extern MPIR_PROCDESC *MPIR_proctable;
+extern int MPIR_proctable_size;
+
+/* Various global variables which a debugger can use for 
+ * 1) finding out what the state of the program is at
+ *    the time the magic breakpoint is hit.
+ * 2) inform the process that it has been attached to and is
+ *    now free to run.
+ */
+extern volatile int MPIR_debug_state;
+extern volatile int MPIR_debug_gate;
+extern const char * MPIR_debug_abort_string;
+extern int          MPIR_being_debugged; /* Cause extra info on internal state
+					  * to be maintained
+					  */
+ 
+/* Values for the debug_state, this seems to be all we need at the moment
+ * but that may change... 
+ */
+#define MPIR_DEBUG_SPAWNED   1
+#define MPIR_DEBUG_ABORTING  2
+
+/* A function we call to tell the debugger that something worthwhile is happening.
+ */
+extern void MPIR_Breakpoint (void);
+
+/*
+ * Other functions whose mere presence in the executable provides information to the
+ * debugger.
+ */
+extern void MPIR_i_am_starter (void);
+extern void MPIR_ignore_queues (void);
+extern void MPIR_force_to_main (void);
+
+#endif /* _UPC_DEBUG_H_ */
Index: libgupc/smp/upc_defs.h
===================================================================
--- libgupc/smp/upc_defs.h	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_defs.h	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,171 @@
+/* Copyright (C) 2001-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _UPC_DEFS_H_
+#define _UPC_DEFS_H_
+
+#include "upc_pts.h"
+
+//begin lib_max_threads_def
+/* Maximum number of THREADS supported in this implementation */
+#define GUPCR_THREAD_SIZE 12
+#define GUPCR_THREADS_MAX (1 << GUPCR_THREAD_SIZE)
+//end lib_max_threads_def
+
+#if GUPCR_PTS_PACKED_REP && (GUPCR_THREADS_SIZE > GUPCR_PTS_THREAD_SIZE)
+#error GUPCR_THREADS_MAX exceeds the size of the packed sptr threads field.
+#endif
+
+
+/* UPC thread-specific information */
+typedef struct upc_thread_info_struct
+  {
+    pid_t pid;
+    int sched_affinity;
+    int mem_affinity;
+#ifdef GUPCR_USE_PTHREADS
+    pthread_t os_thread;
+    int exit_status;
+#endif
+  } upc_thread_info_t;
+typedef upc_thread_info_t *upc_thread_info_p;
+
+#define GUPCR_PROCBITS_PER_WORD OS_BITS_PER_ATOMIC_WORD
+
+#define GUPCR_NUM_PROCBIT_WORDS ((GUPCR_THREADS_MAX + (GUPCR_PROCBITS_PER_WORD - 1)) \
+			    / GUPCR_PROCBITS_PER_WORD)
+
+/* Bit vector used to manage processes */
+typedef os_atomic_t upc_procbits_vec_t[GUPCR_NUM_PROCBIT_WORDS];
+
+/* UPC thread barrier ID  */
+extern GUPCR_THREAD_LOCAL int __upc_barrier_id;
+
+/* There is one global page table per UPC program.
+   The global page table maps (thread, page) into
+   a global page number in the global memory region. */
+typedef upc_page_num_t upc_pte_t;
+typedef upc_pte_t *upc_pte_p;
+
+/* scheduling policies */
+enum upc_sched_policy_enum
+  {
+    GUPCR_SCHED_POLICY_AUTO,	/* kernel's scheduling policy */
+    GUPCR_SCHED_POLICY_NODE,	/* schedule across nodes */
+    GUPCR_SCHED_POLICY_CPU,	/* schedule across cpus - multiple threads per CPU */
+    GUPCR_SCHED_POLICY_CPU_STRICT	/* schedule across cpus - one thread per CPU */
+  };
+typedef enum upc_sched_policy_enum upc_sched_policy_t;
+
+/* Non-Uniform Memory Allocation */
+enum upc_mem_policy_enum
+  {
+    GUPCR_MEM_POLICY_AUTO,	/* kernel's default NUMA policy */
+    GUPCR_MEM_POLICY_NODE,	/* allocate memory from the local node first */
+    GUPCR_MEM_POLICY_STRICT	/* allocate memory from the local node only */
+  };
+typedef enum upc_mem_policy_enum upc_mem_policy_t;
+
+/* Data structure used keep track of cpu's that must ba avoided */
+typedef struct upc_cpu_avoid_struct upc_cpu_avoid_t;
+typedef upc_cpu_avoid_t *upc_cpu_avoid_p;
+
+/* UPC system-wide information */
+typedef struct upc_info_struct
+  {
+    char *program_name;
+    char *host_name;
+    pid_t monitor_pid;
+    int partial_attach_start;
+    os_heap_p runtime_heap;
+    os_lock_t lock;
+    upc_page_num_t init_page_alloc;
+    upc_shared_ptr_t init_heap_base;
+    size_t init_heap_size;
+    int smem_fd;
+    char *mmap_file_name;
+    upc_pte_p gpt;
+    upc_page_num_t cur_page_alloc;
+    upc_shared_ptr_t all_lock;
+    upc_thread_info_t thread_info[GUPCR_THREADS_MAX];
+    int num_cpus;
+    int num_nodes;
+    upc_sched_policy_t sched_policy;
+    upc_mem_policy_t mem_policy;
+  } upc_info_t;
+typedef upc_info_t *upc_info_p;
+
+/* system wide info */
+extern upc_info_p __upc_info;
+
+/* The filename of the location where a runtime
+   error was detected.  This is set by the various
+   debug-enabled ('g') UPC runtime library routines.  */
+extern GUPCR_THREAD_LOCAL const char *__upc_err_filename;
+
+/* The line number of the location where a runtime
+   error was detected.  This is set by the various
+   debug-enabled ('g') UPC runtime library routines.  */
+extern GUPCR_THREAD_LOCAL unsigned int __upc_err_linenum;
+
+#define GUPCR_SET_ERR_LOC() \
+  do \
+    { \
+      __upc_err_filename = filename; \
+      __upc_err_linenum  = linenum; \
+    } while (0)
+
+#define GUPCR_CLEAR_ERR_LOC() \
+  do \
+    { \
+      __upc_err_filename = NULL; \
+      __upc_err_linenum  = 0; \
+    } while (0)
+
+/* The base address of the UPC shared section */
+extern char GUPCR_SHARED_SECTION_START[1];
+
+/* The ending address (plus one) of the UPC shared section */
+extern char GUPCR_SHARED_SECTION_END[1];
+
+/* The base address of the UPC compiled program info. section */
+extern char GUPCR_PGM_INFO_SECTION_START[1];
+
+/* The ending address (plus one) of the UPC compiled program info. section */
+extern char GUPCR_PGM_INFO_SECTION_END[1];
+
+/* The value of THREADS when defined at run time */
+extern int THREADS;
+
+/* Current thread id */
+extern GUPCR_THREAD_LOCAL int MYTHREAD;
+
+#ifdef GUPCR_USE_PTHREADS
+/* The value of UPC_PTHREADS when defined at run time */
+extern int UPC_PTHREADS;
+#endif
+
+#endif /* _UPC_DEFS_H_ */
Index: libgupc/smp/upc_gasp.c
===================================================================
--- libgupc/smp/upc_gasp.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_gasp.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,83 @@
+/* Copyright (C) 2009-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+#include "gasp.h"
+
+/* Since libgupc contains references to these functions, we provide dummy
+   implementations to prevent linker warnings when GASP support has been
+   compiled into GNU UPC, but the user compiles their app regularly.
+   We define these as weak symbols so tools can override them
+   appropriately.  */
+
+#pragma weak gasp_init
+#pragma weak gasp_event_notify
+#pragma weak gasp_event_notifyVA
+#pragma weak gasp_control
+#pragma weak gasp_create_event
+
+gasp_context_t
+gasp_init (gasp_model_t ARG_UNUSED (srcmodel),
+	   int *ARG_UNUSED (argc), char ***ARG_UNUSED (argv))
+{
+  return 0;
+}
+
+void
+gasp_event_notify (gasp_context_t ARG_UNUSED (context),
+		   unsigned int ARG_UNUSED (evttag),
+		   gasp_evttype_t ARG_UNUSED (evttype),
+		   const char *ARG_UNUSED (filename),
+		   int ARG_UNUSED (linenum), int ARG_UNUSED (colnum), ...)
+{
+}
+
+void
+gasp_event_notifyVA (gasp_context_t ARG_UNUSED (context),
+		     unsigned int ARG_UNUSED (evttag),
+		     gasp_evttype_t ARG_UNUSED (evttype),
+		     const char *ARG_UNUSED (filename),
+		     int ARG_UNUSED (linenum),
+		     int ARG_UNUSED (colnum), va_list ARG_UNUSED (varargs))
+{
+}
+
+int
+gasp_control (gasp_context_t ARG_UNUSED (context), int ARG_UNUSED (on))
+{
+  return 0;
+}
+
+unsigned int
+gasp_create_event (gasp_context_t ARG_UNUSED (context),
+		   const char *ARG_UNUSED (name),
+		   const char *ARG_UNUSED (desc))
+{
+  return 0;
+}
Index: libgupc/smp/upc_gum.c
===================================================================
--- libgupc/smp/upc_gum.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_gum.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,197 @@
+/* Copyright (C) 2008-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#include "upc_config.h"
+
+#define GUM_MAX_BUF 1024
+#define GUM_CONDATA_FMT "Host %s PID %d MYTHREAD %d THREADS %d"
+#define GUM_GDBSDATA_FMT "Host %s port %d"
+
+void
+__upc_gum_init (int nthreads, int thread_id)
+{
+  const char *gum_host = GUM_HOST_DEFAULT;
+  int gum_port = GUM_PORT_DEFAULT;
+  const char *gdbserver = "gdbserver";
+  const char *gum_port_env = getenv (GUM_PORT_ENV);
+  const char *gum_gdbserverpath_env = getenv (GUM_GDBSERVERPATH_ENV);
+  const char *gum_attach_delay_env = getenv (GUM_ATTACH_DELAY_ENV);
+  int attach_delay = GUM_ATTACH_DELAY_DEFAULT;
+  int mypid = getpid();
+  char myhost[GUM_MAX_BUF];
+  char hostname[GUM_MAX_BUF];
+  struct hostent *hostent;
+  struct sockaddr_in sockaddr;
+  int gum_sock_fd;
+  FILE *gum_in, *gum_out;
+  char gum_reply[GUM_MAX_BUF];
+  int gum_reply_len;
+  char gum_mux_host[GUM_MAX_BUF];
+  int gum_mux_port;
+  char gum_mux_connect[GUM_MAX_BUF];
+  int gdbserver_pid;
+  if (gethostname (myhost, sizeof (myhost)) != 0)
+    {
+       perror ("gethostname");
+       abort ();
+    }
+  if (gum_port_env)
+    {
+      const char *p = gum_port_env;
+      size_t hostlen;
+      while (*p && *p != ':')
+	++p;
+      if (!*p)
+	{
+	  fprintf (stderr,
+		   "Missing separator in %s environment variable: %s\n",
+		   GUM_PORT_ENV, gum_port_env);
+	  exit (2);
+	}
+      hostlen = (p - gum_port_env);
+      if (!hostlen)
+	{
+	  fprintf (stderr,
+		   "empty host name in `%s' environment variable: `%s'\n",
+		   GUM_PORT_ENV, gum_port_env);
+	  exit (2);
+	}
+      if (hostlen > (sizeof (hostname) - 1))
+	{
+	  fprintf (stderr,
+		   "host name in `%s' environment variable is too long: `%s'\n",
+		   GUM_PORT_ENV, gum_port_env);
+	  exit (2);
+	}
+      strncpy (hostname, gum_port_env, hostlen);
+      hostname[hostlen] = '\0';
+      gum_host = (const char *) hostname;
+      p = p + 1;
+      gum_port = atoi (p);
+      if (!gum_port)
+	{
+	  fprintf (stderr,
+		   "Invalid port number in %s environment variable: %s\n",
+		   GUM_PORT_ENV, gum_port_env);
+	  exit (2);
+	}
+    }
+  if (gum_gdbserverpath_env)
+    {
+      struct stat statbuf;
+      if (stat (gum_gdbserverpath_env, &statbuf) != 0)
+	{
+	  fprintf (stderr,
+		   "Cannot locate gdbserver via environment variable %s: %s\n",
+		   GUM_GDBSERVERPATH_ENV, gum_gdbserverpath_env);
+	  exit (2);
+	}
+      gdbserver = gum_gdbserverpath_env;
+    }
+  if (gum_attach_delay_env)
+    {
+      attach_delay = atoi (gum_attach_delay_env);
+    }
+  hostent = gethostbyname (gum_host);
+  if (!hostent)
+    {
+      fprintf (stderr, "%s: unknown GUM host\n", gum_host);
+      exit (2);
+    }
+  gum_sock_fd = socket (PF_INET, SOCK_STREAM, 0);
+  if (!gum_sock_fd)
+    {
+      perror ("Can't create GUM socket");
+      abort ();
+    }
+  sockaddr.sin_family = PF_INET;
+  sockaddr.sin_port = htons (gum_port);
+  memcpy (&sockaddr.sin_addr.s_addr, hostent->h_addr,
+	  sizeof (struct in_addr));
+  if (connect (gum_sock_fd, (struct sockaddr *) &sockaddr, sizeof (sockaddr))
+      < 0)
+    {
+      perror ("Can't connect to GUM host");
+      abort ();
+    }
+  gum_in = fdopen (gum_sock_fd, "r");
+  if (!gum_in)
+    {
+      perror ("fdopen of gum_in failed");
+      abort ();
+    }
+  setlinebuf (gum_in);
+  gum_out = fdopen (gum_sock_fd, "w");
+  if (!gum_out)
+    {
+      perror ("fdopen of gum_out failed");
+      abort ();
+    }
+  setlinebuf (gum_out);
+  fprintf (gum_out, GUM_CONDATA_FMT, myhost, mypid, thread_id, nthreads);
+  fflush (gum_out);
+  if (!fgets (gum_reply, sizeof (gum_reply), gum_in))
+    {
+      fprintf (stderr, "Can't read GUM reply\n");
+      exit (2);
+    }
+  fclose (gum_in);
+  fclose (gum_out);
+  close (gum_sock_fd);
+  gum_reply_len = strlen (gum_reply);
+  if (gum_reply_len && gum_reply[gum_reply_len - 1] == '\n')
+    gum_reply[--gum_reply_len] = '\0';
+  if (sscanf (gum_reply, GUM_GDBSDATA_FMT, gum_mux_host, &gum_mux_port) != 2)
+    {
+      fprintf (stderr, "%d: invalid GUM reply: %s\n", mypid, gum_reply);
+      exit (2);
+    }
+  if (snprintf (gum_mux_connect, sizeof (gum_mux_connect), "%s:%d",
+		gum_mux_host, gum_mux_port) >= (int) sizeof (gum_mux_connect))
+    {
+      fprintf (stderr, "%d: GUM mux connect buffer exceeds size\n", mypid);
+      exit (2);
+    }
+  if ((gdbserver_pid = fork ()) > 0)
+    {
+      /* Give gdbserver a chance to connect to us.  */
+      sleep (attach_delay);
+    }
+  else if (!gdbserver_pid)
+    {
+      char mypidstr[12];
+      sprintf (mypidstr, "%d", mypid);
+      execl (gdbserver, gdbserver, gum_mux_connect, "--attach", mypidstr, NULL);
+      perror ("gdbserver exec failed");
+      abort ();
+    }
+  else
+    {
+      perror ("fork of gdbserver failed");
+      abort ();
+    }
+}
Index: libgupc/smp/upc_libat_lock.c
===================================================================
--- libgupc/smp/upc_libat_lock.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_libat_lock.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,61 @@
+/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   It is derived from libatomic/config/posix/lock.c.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+#include "upc_libat_lock.h"
+
+/* Locking for a typical-sized operation.  */
+void
+libat_lock_1 (void *ARG_UNUSED (ptr))
+{
+  upc_info_p u = __upc_info;
+  __upc_acquire_lock (&u->lock);
+}
+
+void
+libat_unlock_1 (void *ARG_UNUSED (ptr))
+{
+  upc_info_p u = __upc_info;
+  __upc_release_lock (&u->lock);
+}
+
+/* Locking for a "large" operation.  This should always be some sort of
+   test-and-set operation, as we assume that the interrupt latency would
+   be unreasonably large.  */
+void
+libat_lock_n (void *ARG_UNUSED (ptr), size_t ARG_UNUSED (n))
+{
+  upc_info_p u = __upc_info;
+  __upc_acquire_lock (&u->lock);
+}
+
+void
+libat_unlock_n (void *ARG_UNUSED (ptr), size_t ARG_UNUSED (n))
+{
+  upc_info_p u = __upc_info;
+  __upc_release_lock (&u->lock);
+}
Index: libgupc/smp/upc_libat_lock.h
===================================================================
--- libgupc/smp/upc_libat_lock.h	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_libat_lock.h	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,39 @@
+/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   It is derived from libatomic/config/posix/lock.c.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _UPC_LIBAT_LOCK_H_
+#define _UPC_LIBAT_LOCK_H_
+
+/* Locking for a typical-sized operation.  */
+extern void libat_lock_1 (void *ptr);
+extern void libat_unlock_1 (void *ptr);
+
+/* Locking for a "large" operation.  This should always be some sort of
+   test-and-set operation, as we assume that the interrupt latency would
+   be unreasonably large.  */
+extern void libat_lock_n (void *ptr, size_t n);
+extern void libat_unlock_n (void *ptr, size_t n);
+
+#endif /* !_UPC_LIBAT_LOCK_H_ */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]