This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[UPC 20/22] libgupc runtime library [3/9]


[NOTE: Due to email list size limits, this patch is broken into 9 parts.]

Background
----------

An overview email, describing the UPC-related changes is here:
  https://gcc.gnu.org/ml/gcc-patches/2015-12/msg00005.html

The GUPC branch is described here:
  http://gcc.gnu.org/projects/gupc.html

The UPC-related source code differences are summarized here:
  http://gccupc.org/gupc-changes

All languages (c, c++, fortran, go, lto, objc, obj-c++) have been
bootstrapped; no test suite regressions were introduced,
relative to the GCC trunk.

If you are on the cc-list, your name was chosen either
because you are listed as a maintainer for the area that
applies to the patches described in this email, or you
were a frequent contributor of patches made to files listed
in this email.

In the change log entries included in each patch, the directory
containing the affected files is listed, followed by the files.
When the patches are applied, the change log entries will be
distributed to the appropriate ChangeLog file.

Overview
--------

Libgupc is the UPC runtime library, for GUPC.  The configuration,
makefile, and documentation related changes have been broken out into
separate patches.

As noted in the ChangeLog entry below, this is all new code.
Two communication layers are supported: (1) SMP, via 'mmap'
or (2) the Portals4 library API, which supports multi-node
operation.  Libgupc generally requires a POSIX-compliant target OS.

The 'smp' runtime is the default runtime.  The 'portals4'
runtime is experimental; it supports multi-node operation
using the Portals4 communications library.

Most of the libgupc/include/ directory contains standard headers
defined by the UPC language specification. 'make install' will
install these headers in the directory where other "C"
header files are located.

2015-11-30  Gary Funck  <gary@intrepid.com>

	libgupc/smp/
	* upc_lib.h: New.
	* upc_libg.c: New.
	* upc_lock.h: New.
	* upc_lock.upc: New.
	* upc_lock_sup.c: New.
	* upc_lock_sup.h: New.
	* upc_main.c: New.
	* upc_mem.c: New.
	* upc_mem.h: New.
	* upc_nb.upc: New.
	* upc_numa.c: New.
	* upc_numa.h: New.
	* upc_numa_stub.c: New.
	* upc_pgm_info.c: New.
	* upc_pts.h: New.
	* upc_pupc.c: New.
	* upc_pupc.h: New.
	* upc_sup.h: New.
	* upc_sync.h: New.
	* upc_sysdep.c: New.
	* upc_sysdep.h: New.
	* upc_tick.c: New.
	* upc_vm.c: New.

Index: libgupc/smp/upc_lib.h
===================================================================
--- libgupc/smp/upc_lib.h	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_lib.h	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,68 @@
+/* Copyright (C) 2006-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#ifndef _UPC_LIB_H_
+#define _UPC_LIB_H_
+
+/* Definition of user-visible UPC library routines,
+   in a form that they can be called from the
+   "C"-based runtime.  */
+
+extern size_t upc_threadof (upc_shared_ptr_t);
+extern size_t upc_phaseof (upc_shared_ptr_t);
+extern upc_shared_ptr_t upc_resetphase (upc_shared_ptr_t);
+extern size_t upc_addrfield (upc_shared_ptr_t);
+extern size_t upc_affinitysize (size_t, size_t, size_t);
+
+extern void upc_global_exit (int);
+
+extern void upc_memcpy (upc_shared_ptr_t dest, upc_shared_ptr_t src,
+			size_t n);
+extern void upc_memget (void *dest, upc_shared_ptr_t src, size_t n);
+extern void upc_memput (upc_shared_ptr_t dest, const void *src, size_t n);
+extern void upc_memset (upc_shared_ptr_t dest, int c, size_t n);
+
+extern upc_shared_ptr_t upc_global_alloc (size_t, size_t);
+extern upc_shared_ptr_t upc_all_alloc (size_t, size_t);
+extern upc_shared_ptr_t upc_alloc (size_t);
+extern void upc_free (upc_shared_ptr_t);
+extern void upc_all_free (upc_shared_ptr_t);
+
+extern upc_shared_ptr_t upc_lock_alloc (void);
+extern void upc_lock_free (upc_shared_ptr_t);
+extern void upc_all_lock_free (upc_shared_ptr_t);
+extern upc_shared_ptr_t upc_all_lock_alloc (void);
+extern upc_shared_ptr_t upc_global_lock_alloc (void);
+extern void upc_lock (upc_shared_ptr_t);
+extern int upc_lock_attempt (upc_shared_ptr_t);
+extern void upc_unlock (upc_shared_ptr_t);
+
+typedef uint64_t upc_tick_t;
+extern upc_tick_t upc_ticks_now(void);
+extern uint64_t upc_ticks_to_ns(upc_tick_t ticks);
+
+#endif /* _UPC_LIB_H_ */
Index: libgupc/smp/upc_libg.c
===================================================================
--- libgupc/smp/upc_libg.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_libg.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,251 @@
+/* Copyright (C) 2009-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+#include "upc_sup.h"
+#include "upc_lib.h"
+#include "gasp_upc.h"
+#include "upc_pupc.h"
+
+void
+__upc_barrierg (int barrier_id, const char *filename, const int linenum)
+{
+  int named = (barrier_id != INT_MIN);
+  p_start (GASP_UPC_BARRIER, named, barrier_id);
+  GUPCR_SET_ERR_LOC();
+  __upc_barrier (barrier_id);
+  GUPCR_CLEAR_ERR_LOC();
+  p_end (GASP_UPC_BARRIER, named, barrier_id);
+}
+
+void
+__upc_notifyg (int barrier_id, const char *filename, const int linenum)
+{
+  int named = (barrier_id != INT_MIN);
+  p_start (GASP_UPC_NOTIFY, named, barrier_id);
+  GUPCR_SET_ERR_LOC();
+  __upc_notify (barrier_id);
+  GUPCR_CLEAR_ERR_LOC();
+  p_end (GASP_UPC_NOTIFY, named, barrier_id);
+}
+
+void
+__upc_waitg (int barrier_id, const char *filename, const int linenum)
+{
+  int named = (barrier_id != INT_MIN);
+  p_start (GASP_UPC_WAIT, named, barrier_id);
+  GUPCR_SET_ERR_LOC();
+  __upc_wait (barrier_id);
+  GUPCR_CLEAR_ERR_LOC();
+  p_end (GASP_UPC_WAIT, named, barrier_id);
+}
+
+upc_shared_ptr_t
+upc_global_lock_allocg (const char *filename, int linenum)
+{
+  upc_shared_ptr_t result;
+  p_start (GASP_UPC_GLOBAL_LOCK_ALLOC);
+  GUPCR_SET_ERR_LOC();
+  result = upc_global_lock_alloc();
+  GUPCR_CLEAR_ERR_LOC();
+  p_end (GASP_UPC_GLOBAL_LOCK_ALLOC, &result);
+  return result;
+}
+
+void
+upc_all_lock_freeg (upc_shared_ptr_t ptr, const char *filename, int linenum)
+{
+  p_start (GASP_UPC_LOCK_FREE, &ptr);
+  GUPCR_SET_ERR_LOC();
+  upc_all_lock_free(ptr);
+  GUPCR_CLEAR_ERR_LOC();
+  p_end (GASP_UPC_LOCK_FREE, &ptr);
+}
+
+void
+upc_lock_freeg (upc_shared_ptr_t ptr, const char *filename, int linenum)
+{
+  p_start (GASP_UPC_LOCK_FREE, &ptr);
+  GUPCR_SET_ERR_LOC();
+  upc_lock_free(ptr);
+  GUPCR_CLEAR_ERR_LOC();
+  p_end (GASP_UPC_LOCK_FREE, &ptr);
+}
+
+upc_shared_ptr_t
+upc_all_lock_allocg (const char *filename, int linenum)
+{
+  upc_shared_ptr_t result;
+  p_start (GASP_UPC_ALL_LOCK_ALLOC);
+  GUPCR_SET_ERR_LOC();
+  result = upc_all_lock_alloc();
+  GUPCR_CLEAR_ERR_LOC();
+  p_end (GASP_UPC_ALL_LOCK_ALLOC, &result);
+  return result;
+}
+
+void
+upc_lockg (upc_shared_ptr_t ptr, const char *filename, int linenum)
+{
+  p_start (GASP_UPC_LOCK, &ptr);
+  GUPCR_SET_ERR_LOC();
+  upc_lock(ptr);
+  GUPCR_CLEAR_ERR_LOC();
+  p_end (GASP_UPC_LOCK, &ptr);
+}
+
+int
+upc_lock_attemptg (upc_shared_ptr_t ptr, const char *filename, int linenum)
+{
+  int status;
+  p_start (GASP_UPC_LOCK_ATTEMPT, &ptr);
+  GUPCR_SET_ERR_LOC();
+  status = upc_lock_attempt(ptr);
+  GUPCR_CLEAR_ERR_LOC();
+  p_end (GASP_UPC_LOCK_ATTEMPT, &ptr, status);
+  return status;
+}
+
+void
+upc_unlockg (upc_shared_ptr_t ptr, const char *filename, int linenum)
+{
+  p_start (GASP_UPC_UNLOCK, &ptr);
+  GUPCR_SET_ERR_LOC();
+  upc_unlock(ptr);
+  GUPCR_CLEAR_ERR_LOC();
+  p_end (GASP_UPC_UNLOCK, &ptr);
+}
+
+void
+__upc_funcg (int start, const char *funcname,
+             const char *filename, const int linenum)
+{
+  if (start)
+    p_start (GASP_C_FUNC, funcname);
+  else
+    p_end (GASP_C_FUNC, funcname);
+}
+
+void
+__upc_forallg (int start, const char *filename, const int linenum)
+{
+  if (start)
+    p_start (GASP_UPC_FORALL);
+  else
+    p_end (GASP_UPC_FORALL);
+}
+
+extern void __upc_exitg (int status, const char *filename, int linenum)
+             __attribute__ ((__noreturn__));
+
+void
+__upc_exitg (int status, const char *filename, int linenum)
+{
+  p_start (GASP_UPC_COLLECTIVE_EXIT, status);
+  p_end (GASP_UPC_COLLECTIVE_EXIT, status);
+  __upc_exit (status);
+}
+
+void
+upc_global_exitg (int status, const char *filename, int linenum)
+{
+  p_atomic (GASP_UPC_NONCOLLECTIVE_EXIT, status);
+  upc_global_exit (status);
+}
+
+void *
+__cvtaddrg (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  void *result;
+  GUPCR_SET_ERR_LOC();
+  result = __cvtaddr (p);
+  GUPCR_CLEAR_ERR_LOC();
+  return result;
+}
+
+void *
+__getaddrg (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  void *result;
+  GUPCR_SET_ERR_LOC();
+  result = __getaddr (p);
+  GUPCR_CLEAR_ERR_LOC();
+  return result;
+}
+
+size_t
+upc_addrfieldg (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  size_t result;
+  GUPCR_SET_ERR_LOC();
+  result = upc_addrfield (p);
+  GUPCR_CLEAR_ERR_LOC();
+  return result;
+}
+
+size_t
+upc_affinitysizeg (size_t totalsize, size_t nbytes, size_t threadid,
+                   const char *filename, int linenum)
+{
+  size_t result;
+  GUPCR_SET_ERR_LOC();
+  result = upc_affinitysize (totalsize, nbytes, threadid);
+  GUPCR_CLEAR_ERR_LOC();
+  return result;
+}
+
+size_t
+upc_phaseofg (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  size_t result;
+  GUPCR_SET_ERR_LOC();
+  result = upc_phaseof (p);
+  GUPCR_CLEAR_ERR_LOC();
+  return result;
+}
+
+upc_shared_ptr_t
+upc_resetphaseg (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  upc_shared_ptr_t result;
+  GUPCR_SET_ERR_LOC();
+  result = upc_resetphase (p);
+  GUPCR_CLEAR_ERR_LOC();
+  return result;
+}
+
+size_t
+upc_threadofg (upc_shared_ptr_t p, const char *filename, int linenum)
+{
+  size_t result;
+  GUPCR_SET_ERR_LOC();
+  result = upc_threadof (p);
+  GUPCR_CLEAR_ERR_LOC();
+  return result;
+}
Index: libgupc/smp/upc_lock.h
===================================================================
--- libgupc/smp/upc_lock.h	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_lock.h	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,31 @@
+/* Copyright (C) 2010-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _UPC_LOCK_H_
+#define _UPC_LOCK_H_
+
+extern void __upc_lock_init (void);
+#endif /* !_UPC_LOCK_H_ */
Index: libgupc/smp/upc_lock.upc
===================================================================
--- libgupc/smp/upc_lock.upc	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_lock.upc	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,419 @@
+/* Copyright (C) 2010-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include <upc.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include "config.h"
+#include "upc_lock_sup.h"
+
+/* UPC lock implementation.
+
+   The UPC lock functions use MCS locks as described in the
+   Mellor-Crummey and Scott paper: "Algorithms for Scalable Synchronization
+   on Shared-Memory Multiprocessors", ACM Transaction on Computer Systems,
+   February 1991.
+
+   The following data structures are used in this implementation:
+
+   * Lock link block
+     A data structure in the shared address space used to link threads
+     waiting on the lock.  Each thread inserts itself on the list with
+     a link block that but has affinity to the thread itself. It has
+     three fields: (1) next - link list pointer, (2) signal - notification
+     of lock ownership transfer, (3) free flag - link block was freed by
+     some other thread. A link block has affinity to the owner of the block.
+   * Lock Link block reference
+     A 64 bits container for pointer to shared that contains only a thread
+     number and address field.  Link reference allow the lock routines to
+     efficiently execute atomic operations on shared pointers.
+   * Lock structure (upc_lock_t)
+     A lock is a data structure living in the shared memory space.
+     Contains two lock link references: (1) last - a lock link reference
+     to the link block of the last thread on the lock waiting list,
+     (2) owner - link reference to the lock's owner link block. The lock
+     data structure has affinity of the thread that created the lock.
+
+   The lock data structure goes though the following states:
+
+   * Lock is free
+     Last link block reference is NULL.
+   * Lock is taken
+     Last link block reference points to the last thread's link block on
+     the waiting queue.  Owner link reference points to the owner thread's
+     link block.  Only the owner of the lock is allowed to manipulate the
+     owner's link reference in the lock data structure.
+
+   The following operations are performed on the lock data structure:
+
+   * Lock acquire
+     Thread allocates a link block and write its reference to the 'last'
+     filed of the lock data structure by performing an atomic SWAP
+     operation.  If returned value is NULL, thread is the owner of the
+     lock.  Otherwise a link reference to the last thread on the waiting
+     queue is returned and thread needs to link itself on the waiting
+     queue.
+   * Lock release
+     Attempt to write a NULL link block reference into the lock's 'last'
+     field with atomic CSWAP operation.  If successful, lock is released.
+     Otherwise, the ownership of the lock must be passed to the first
+     thread on the wait queue.
+   * Lock allocation/free
+     Lock is allocated from the shared memory space or from the local
+     free list.  They are freed by placing the lock data structure on
+     the local lock free list if the lock has affinity of the thread that
+     releases it.  Otherwise lock's memory is released.
+*/
+
+struct upc_lock_link_cache_struct
+{
+  upc_lock_link_t lock_links[GUPCR_MAX_LOCKS];
+};
+typedef struct upc_lock_link_cache_struct upc_lock_link_cache_t;
+
+/* Array of lock links managed as a per-thread free list.  */
+static shared upc_lock_link_cache_t upc_lock_link_cache[THREADS];
+
+/* Per thread lock link free list.  */
+static upc_lock_link_t *upc_lock_links;
+/* Null link block reference.  Used for CSWAP operations.  */
+upc_link_ref null_link = {.atomic = 0 };
+
+/* UPC lock free list.  */
+static upc_lock_t *lock_free;
+
+/* Memory allocation support.  */
+upc_lock_t *shared __upc_all_lock;
+shared upc_lock_t __upc_alloc_lock;
+
+__attribute__ ((__always_inline__))
+static inline
+void
+upc_new_lock_init (upc_lock_t *lock)
+{
+  lock->last.atomic = 0;
+  lock->owner_link.atomic = 0;
+}
+
+/* Lock link block utilities.  */
+
+/* Lock link block is a data structure that links
+   lock waiting threads.  It is located in the
+   shared space of the thread waiting for the lock.
+   They are locally managed with a free list rooted
+   at 'upc_lock_links'.
+
+   NOTE:
+   The current design for memory allocation uses
+   UPC locks in alloc/free routines.  Thus, link blocks
+   cannot be allocate with the UPC memory allocation
+   routines.  */
+
+/* Initialize lock link block free list.
+   NOTE: Link with local addresses for faster access.  */
+static void
+upc_lock_link_init (void)
+{
+  shared [] upc_lock_link_t *slink = upc_lock_link_cache[MYTHREAD].lock_links;
+  upc_lock_link_t *link = (upc_lock_link_t *) slink;
+  upc_lock_links = link;
+  memset (link, '\0', sizeof (upc_lock_link_cache_t));
+  for (int i = 0; i < (GUPCR_MAX_LOCKS - 1); i++)
+    {
+      link[i].link_ref = upc_to_link_ref (slink++);
+      link[i].link = &link[i + 1];
+    }
+  link[GUPCR_MAX_LOCKS - 1].link_ref = upc_to_link_ref (slink++);
+}
+
+/* Release lock link block.  */
+__attribute__ ((__always_inline__))
+static inline
+void
+upc_lock_link_free (upc_lock_link_t * link)
+{
+  SET_NULL_LOCK_REF (link->next);
+  link->signal = 0;
+  link->link = upc_lock_links;
+  upc_lock_links = link;
+}
+
+/* Allocate lock link block.  */
+__attribute__ ((__always_inline__))
+static inline
+upc_lock_link_t *
+upc_lock_link_alloc (void)
+{
+  upc_lock_link_t *link = upc_lock_links;
+  if (!link)
+    {
+      /* Try to find a link block that has been freed by
+         some other thread and thus not returned to the free list.  */
+      upc_lock_link_t *llink = (upc_lock_link_t *)
+	upc_lock_link_cache[MYTHREAD].lock_links;
+      for (int i = 0; i < (GUPCR_MAX_LOCKS - 1); ++i)
+	{
+	  if (llink->free)
+	    {
+	      llink->free = 0;
+	      upc_lock_link_free (llink);
+	    }
+	  llink++;
+	}
+      link = upc_lock_links;
+      if (!link)
+	__upc_fatal ("Cannot allocate a UPC lock link. "
+		"The number of allocated per thread lock links "
+		"exceeds the configuration defined maximum of entries.");
+    }
+  upc_lock_links = link->link;
+  return link;
+}
+
+/* Allocate a lock and return a pointer to it.
+   This is not a collective function.  */
+upc_lock_t *
+upc_global_lock_alloc (void)
+{
+  upc_lock_t *lock;
+  if (lock_free)
+    {
+      lock = lock_free;
+      lock_free = lock->free_link;
+    }
+  else
+    {
+      /* Allocate space for the lock from shared memory with
+         affinity to the calling thread.  */
+      lock = upc_alloc (sizeof (upc_lock_t));
+      if (lock == NULL)
+	__upc_fatal ("Cannot allocate memory for the lock");
+    }
+  upc_new_lock_init (lock);
+  return lock;
+}
+
+/* Free all lock resources.
+   If lock has affinity to the calling thread it is released on the
+   local free list.  If 'lock' is a null pointer, no action occurs.
+   Otherwise, if the argument does not match a pointer earlier
+   returned by the alloc function, or if the lock has been de-allocated
+   by a previous call to 'upc_lock_free' the behavior is undefined.  */
+
+void
+upc_lock_free (upc_lock_t *lock)
+{
+  upc_link_ref owner;
+  if (lock == NULL)
+    return;
+  /* Release the link block if this thread owns the lock.  */
+  owner = lock->owner_link;
+  if (!NULL_LOCK_REF (owner))
+    {
+      shared upc_lock_link_t *link = upc_from_link_ref (owner);
+      if (MYTHREAD == (int) upc_threadof (link))
+	{
+	  upc_lock_link_free ((upc_lock_link_t *) link);
+	}
+      else
+	link->free = 1;
+    }
+  if (MYTHREAD == (int) upc_threadof (lock))
+    {
+      /* Release it on the local free list.  */
+      lock->free_link = lock_free;
+      lock_free = lock;
+    }
+  else
+    upc_free (lock);
+}
+
+/* Collective free all lock resources.  */
+void
+upc_all_lock_free (upc_lock_t *lock)
+{
+  upc_link_ref owner;
+  if (lock == NULL)
+    return;
+  /* Release the link block if this thread owns the lock.  */
+  owner = lock->owner_link;
+  if (!NULL_LOCK_REF (owner))
+    {
+      shared upc_lock_link_t *link = upc_from_link_ref (owner);
+      if (MYTHREAD == (int) upc_threadof (link))
+	{
+	  upc_lock_link_free ((upc_lock_link_t *) link);
+	}
+    }
+  if (MYTHREAD == (int) upc_threadof (lock))
+    {
+      /* Release it on the local free list.  */
+      lock->free_link = lock_free;
+      lock_free = lock;
+    }
+  upc_barrier;
+}
+
+/* Allocate a lock and return a pointer to it.
+   'upc_all_lock_alloc' is a collective function.  */
+upc_lock_t *
+upc_all_lock_alloc (void)
+{
+  upc_lock_t *lock;
+  upc_barrier (-1);
+  if (MYTHREAD == 0)
+    {
+      if (lock_free)
+	{
+	  lock = lock_free;
+	  lock_free = lock->free_link;
+	}
+      else
+	{
+	  lock = upc_alloc (sizeof (upc_lock_t));
+	  if (lock == NULL)
+	    __upc_fatal ("Cannot allocate memory for the lock");
+	}
+      upc_new_lock_init (lock);
+      __upc_all_lock = lock;
+    }
+  upc_barrier (-1);
+  return __upc_all_lock;
+}
+
+/* UPC lock acquire.  */
+void
+upc_lock (upc_lock_t *lock)
+{
+  upc_lock_link_t *link;
+  upc_link_ref old_link_ref;
+  link = upc_lock_link_alloc ();
+
+  /* Insert this thread on the waiting list.  */
+  upc_link_ref_swap (&lock->last, &old_link_ref, link->link_ref);
+  if (!NULL_LOCK_REF (old_link_ref))
+    {
+      /* We have to wait.  "old_link_ref" contains a reference
+         to the last thread on the wait queue.  */
+      shared upc_lock_link_t *rmt_link = upc_from_link_ref (old_link_ref);
+      upc_link_ref_put ((shared upc_link_ref *) &rmt_link->next,
+			link->link_ref);
+      /* Wait for lock ownership notification.  */
+      __upc_spin_until (link->signal);
+    }
+  lock->owner_link = link->link_ref;
+  upc_fence;
+}
+
+/* UPC lock acquire attempt.
+   Return 1 if lock is acquired, 0 otherwise.  */
+int
+upc_lock_attempt (upc_lock_t *lock)
+{
+  upc_lock_link_t *link;
+  int compare_ok;
+  /* No need go further if lock is unavailable.  */
+  if (!NULL_LOCK_REF (upc_link_ref_last (&lock->last)))
+    return 0;
+  /* Try to allocate the lock.  */
+  link = upc_lock_link_alloc ();
+  compare_ok = upc_link_ref_cswap (&lock->last, null_link, link->link_ref);
+  if (compare_ok)
+    {
+      lock->owner_link = link->link_ref;
+      upc_fence;
+    }
+  else
+    {
+      upc_lock_link_free (link);
+    }
+  return compare_ok;
+}
+
+/* UPC lock release.  */
+void
+upc_unlock (upc_lock_t *lock)
+{
+  upc_lock_link_t *link;
+  upc_link_ref link_ref = lock->owner_link;
+  int compare_ok;
+
+  if (!lock)
+    __upc_fatal ("Trying to release a NULL lock");
+  if (NULL_LOCK_REF (link_ref))
+    __upc_fatal ("Trying to release a lock that is not locked");
+  upc_fence;
+  link = (upc_lock_link_t *) upc_from_link_ref (link_ref);
+
+  /* Try to release the lock by trying to write a NULL into lock
+     block (last).  Use CSWAP with link_ref as expected.  */
+  compare_ok = upc_link_ref_cswap (&lock->last, link_ref, null_link);
+  if (!compare_ok)
+    {
+      /* Another thread is already waiting for the lock,
+         pass the ownership.  */
+      /* Make sure that waiting thread completed insertion on the
+         waiting list.  */
+      __upc_spin_until (!NULL_LOCK_REF (upc_link_ref_get (&link->next)));
+      /* Notify the waiting thread that it now owns the lock.  */
+      {
+	shared upc_lock_link_t *rmt_link;
+	rmt_link = upc_from_link_ref (link->next);
+	rmt_link->signal = 1;
+      }
+    }
+  upc_lock_link_free (link);
+}
+
+/* Heap manager lock support.  */
+
+void
+__upc_acquire_alloc_lock ()
+{
+  upc_lock (&__upc_alloc_lock);
+}
+
+void
+__upc_release_alloc_lock ()
+{
+  upc_unlock (&__upc_alloc_lock);
+}
+
+/* Initialize UPC lock resources.  */
+void
+__upc_lock_init (void)
+{
+  upc_lock_link_init ();
+  lock_free = NULL;
+
+  /* Heap manager lock must be manually initialized.  */
+  if (!MYTHREAD)
+    upc_new_lock_init (&__upc_alloc_lock);
+}
+
+/** @} */
Index: libgupc/smp/upc_lock_sup.c
===================================================================
--- libgupc/smp/upc_lock_sup.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_lock_sup.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,49 @@
+/* Copyright (C) 2010-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+
+/* On some 32 bit targets 64 bit CSWAP is not available.  */
+#if !defined (__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8)
+/* Use OS spin lock to protect the critical section.  */
+unsigned char
+__sync_bool_compare_and_swap_8 (long long *addr, long long exp, long long val)
+{
+  unsigned char result = 0;
+  upc_info_p u = __upc_info;
+  __upc_acquire_lock (&u->lock);
+  if (*addr == exp)
+    {
+      *addr = val;
+      result = 1;
+    }
+  __upc_release_lock (&u->lock);
+  return result;
+}
+#endif
+ 
Index: libgupc/smp/upc_lock_sup.h
===================================================================
--- libgupc/smp/upc_lock_sup.h	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_lock_sup.h	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,226 @@
+/* Copyright (C) 2010-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _UPC_LOCK_SUP_H_
+#define _UPC_LOCK_SUP_H_
+
+/* GUPC lock implementation support routines.  */
+
+/* Lock reference atomic type definitions.  */
+#define GUPCR_LOCK_ATOMIC_THREAD_TYPE unsigned long
+#define GUPCR_LOCK_ATOMIC_VADDR_TYPE unsigned long long
+#define GUPCR_LOCK_ATOMIC_TYPE unsigned long long
+/* Store/Load of lock reference values must be atomic. On 32 bit
+   use atomic functions.  */
+#if __SIZEOF_POINTER__ == 4
+#define GUPCR_ATOMIC_LOCK_REF_ACCESS 1
+#else
+#define GUPCR_ATOMIC_LOCK_REF_ACCESS 0
+#endif
+/* Lock reference value is a 64 bits value.  If bigger (struct
+   implementation on 64 bit) it must be converted.  */
+#if defined (GUPCR_PTS_STRUCT_REP) && __SIZEOF_POINTER__ == 8
+#define GUPCR_CONVERT_LOCK_REF 1
+#else
+#define GUPCR_CONVERT_LOCK_REF 0
+#endif
+
+/* Lock link reference pointer.
+   "shared" pointer with thread and offset only. Small enough that
+   runtime can read/write it without the need to have the exclusive
+   rights to the memory (at least for packed/64bits).  */
+struct upc_lock_thread_struct
+  {
+    GUPCR_LOCK_ATOMIC_THREAD_TYPE thread:GUPCR_THREAD_SIZE;
+    GUPCR_LOCK_ATOMIC_VADDR_TYPE addr:64-GUPCR_THREAD_SIZE;
+  };
+union upc_lock_link_ptr
+  {
+    struct upc_lock_thread_struct sptr;
+    GUPCR_LOCK_ATOMIC_TYPE atomic;
+  };
+typedef union upc_lock_link_ptr upc_link_ref;
+#define NULL_LOCK_REF(P) (P.atomic == 0)
+#define SET_NULL_LOCK_REF(P) (P.atomic = 0)
+#define LOCK_REF_THREAD(P) (P.sptr.thread)
+#define SAME_LOCK_REF(P,V) (P.atomic == V.atomic)
+
+typedef struct upc_lock_link_struct upc_lock_link_t;
+
+/* upc_lock_t is an opaque shared type.  The 'upc_lock_struct'
+   structure describes the internal representation of the
+   UPC lock type.
+
+   UPC lock implementation uses builtin atomic functions
+   for swap/cswap of the UPC shared pointer.  */
+
+struct upc_lock_struct
+{
+  upc_link_ref last;		/* Last thread on the waiting list.  */
+  upc_link_ref owner_link;	/* Lock owner link block pointer.  */
+  upc_lock_t *free_link;
+} __attribute__ ((aligned(64)));
+
+struct upc_lock_link_struct
+{
+  upc_link_ref next;		  /* Next thread on the waiting list.  */
+  int signal;			  /* Notification of lock ownership.  */
+  int free;			  /* Indication that link block is not used.  */
+  upc_link_ref link_ref;	  /* Lock reference of this block.  */
+  upc_lock_link_t *link;	  /* Free list link pointer.  */
+} __attribute__ ((aligned(64)));
+
+/* UPC shared point to C representation. */
+typedef union pts_as_rep
+  {
+    shared void *pts;
+    upc_shared_ptr_t rep;
+  } pts_as_rep_t;
+
+/* Convert pointer to shared into the link reference.  */
+__attribute__((__always_inline__))
+static inline
+upc_link_ref
+upc_to_link_ref (shared void *p)
+{
+#if GUPCR_CONVERT_LOCK_REF
+  upc_link_ref ref;
+  union pts_as_rep
+    {
+      shared void *s;
+      upc_shared_ptr_t v;
+    } pts = { .s = p };
+  ref.sptr.thread = GUPCR_PTS_THREAD (pts.v);
+  ref.sptr.addr = GUPCR_PTS_VADDR (pts.v);
+  return ref;
+#else
+  union pts_as_rep
+    {
+      shared void *pts;
+      upc_link_ref ref;
+    } pts = { .pts = p };
+  return pts.ref;
+#endif
+}
+
+/* Convert link reference into a pointer to shared.  */
+__attribute__((__always_inline__))
+static inline
+shared upc_lock_link_t *
+upc_from_link_ref (upc_link_ref val)
+{
+#if GUPCR_CONVERT_LOCK_REF
+  union pts_as_rep
+    {
+      shared upc_lock_link_t  *s;
+      upc_shared_ptr_t v;
+    } pts;
+  GUPCR_PTS_SET_NULL_SHARED (pts.v);
+  GUPCR_PTS_SET_VADDR (pts.v, (GUPCR_LOCK_ATOMIC_VADDR_TYPE)val.sptr.addr);
+  GUPCR_PTS_SET_THREAD (pts.v, val.sptr.thread);
+  return pts.s; 
+#else
+  union pts_as_rep
+    {
+      shared upc_lock_link_t  *pts;
+      upc_link_ref ref;
+    } pts = { .ref = val };
+  return pts.pts;
+#endif
+}
+
+__attribute__((__always_inline__))
+static inline
+void
+upc_link_ref_swap (shared void *p, upc_link_ref *old, upc_link_ref val)
+{
+  upc_link_ref *addr = __upc_map_to_local (p);
+  do
+    {
+      *old = *addr;
+    } while (!__sync_bool_compare_and_swap ((GUPCR_LOCK_ATOMIC_TYPE *) addr,
+		old->atomic, val.atomic));
+}
+
+__attribute__((__always_inline__))
+static inline
+int
+upc_link_ref_cswap (shared void *p, upc_link_ref cmp, upc_link_ref val)
+{
+  upc_link_ref *addr = __upc_map_to_local (p);
+  return __sync_bool_compare_and_swap ((GUPCR_LOCK_ATOMIC_TYPE *) addr,
+		cmp.atomic, val.atomic);
+}
+
+__attribute__((__always_inline__))
+static inline
+void
+upc_link_ref_put (shared upc_link_ref *p, upc_link_ref val)
+{
+  GUPCR_LOCK_ATOMIC_TYPE *addr;
+  addr = __upc_map_to_local(p);
+#if GUPCR_ATOMIC_LOCK_REF_ACCESS
+  {
+    GUPCR_LOCK_ATOMIC_TYPE tmp;
+    do
+      {
+        tmp = *addr;
+      } while (!__sync_bool_compare_and_swap (addr, tmp, val.atomic));
+  }
+#else
+  *addr = val.atomic;
+#endif
+}
+
+__attribute__((__always_inline__))
+static inline
+upc_link_ref
+upc_link_ref_get (upc_link_ref *p)
+{
+#if GUPCR_ATOMIC_LOCK_REF_ACCESS
+  GUPCR_LOCK_ATOMIC_TYPE *addr = (GUPCR_LOCK_ATOMIC_TYPE *) p;;
+  GUPCR_LOCK_ATOMIC_TYPE tmp;
+  do
+    {
+      tmp = * (GUPCR_LOCK_ATOMIC_TYPE *) addr;
+    } while (!__sync_bool_compare_and_swap (
+	     (GUPCR_LOCK_ATOMIC_TYPE *) addr, tmp, tmp));
+  return (upc_link_ref) tmp;
+#else
+  return *p;
+#endif
+}
+
+__attribute__((__always_inline__))
+static inline
+upc_link_ref
+upc_link_ref_last (shared void *p)
+{
+  upc_link_ref *addr = __upc_map_to_local (p);
+  return upc_link_ref_get (addr);
+}
+
+#endif /* !_UPC_LOCK_SUP_H_ */
Index: libgupc/smp/upc_main.c
===================================================================
--- libgupc/smp/upc_main.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_main.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,1176 @@
+/* Copyright (C) 2001-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include <stdarg.h>
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+#include "upc_lock.h"
+#include "upc_sup.h"
+#include "upc_sync.h"
+#include "upc_affinity.h"
+#include "upc_numa.h"
+#include "upc_debug.h"
+#include "gasp_upc.h"
+#include "upc_pupc.h"
+#if HAVE_UPC_BACKTRACE
+#include "upc_backtrace.h"
+#endif
+
+/* user's main program */
+extern int GUPCR_MAIN (int argc, char *argv[]);
+
+/* The number of THREADS, as specified on the command line */
+int THREADS = -1;
+
+#ifdef GUPCR_USE_PTHREADS
+/* The number of pthreads to run per process, as specified on the
+   command line. Required to be equal to the number of UPC threads
+   in the current implementation. */
+int UPC_PTHREADS = -1;
+#endif /* GUPCR_USE_PTHREADS */
+
+/* The current thread number (range: 0..THREADS-1) */
+GUPCR_THREAD_LOCAL int MYTHREAD;
+
+/* Depth count used to implement the semantics of
+   nested upc_forall statements.  */
+GUPCR_THREAD_LOCAL int __upc_forall_depth;
+
+/* The UPC page size, in bits.  Don't use 'const' here
+   because we want this to end up the loaded data where
+   the debug assistant can read it.  Eventually move this
+   to the upc_info structure.  */
+int __upc_page_shift = GUPCR_VM_OFFSET_BITS;
+
+/* Executable program's name */
+static char *__upc_pgm_name;
+
+/* Runtime state information */
+upc_info_p __upc_info;
+
+/* The filename of the location where a runtime
+   error was detected.  This is set by the various
+   debug-enabled ('g') UPC runtime library routines.  */
+GUPCR_THREAD_LOCAL const char *__upc_err_filename;
+
+/* The line number of the location where a runtime
+   error was detected.  This is set by the various
+   debug-enabled ('g') UPC runtime library routines.  */
+GUPCR_THREAD_LOCAL unsigned int __upc_err_linenum;
+
+/* Local host name.  */
+#define HOST_NAME_LEN 256
+static char host_name[HOST_NAME_LEN];
+
+/* per-thread initial heap size */
+static size_t __upc_init_heap_size = GUPCR_DEFAULT_PER_THREAD_HEAP_SIZE;
+
+/* CPU scheduling policy */
+static upc_sched_policy_t __upc_sched_policy = GUPCR_SCHED_POLICY_DEFAULT;
+
+/* CPU memory affinity policy */
+static upc_mem_policy_t __upc_mem_policy = GUPCR_MEM_POLICY_DEFAULT;
+
+/* list of CPU's that must be avoided */
+static upc_cpu_avoid_p __upc_cpu_avoid_set;
+
+/* UPC debug on/off */
+static int __upc_gum_debug = 0;
+
+/* UPC debug - UDA server shared pointer.
+       Shared pointer type definition must be used, otherwise compiler
+       removes type definition. */
+static upc_dbg_shared_ptr_t dbg_shared_ptr __attribute__ ((__unused__));
+
+#define IS_MULT_CHAR(c) ((c) == 'k' || (c) == 'K' \
+			 || (c) == 'm' || (c) == 'M' \
+			 || (c) == 'g' || (c) == 'G')
+#define MULT_FACTOR(c) (((c) == 'k' || (c) == 'K') ? 1024 \
+                        : ((c) == 'm' || (c) == 'M') ? 1024*1024 \
+		        : ((c) == 'g' || (c) == 'G') ? 1024*1024*1024 : 1)
+
+/* switches that require extra argument (values) */
+#define SWITCH_TAKES_ARG(STR) \
+  (!strcmp (STR, "-n") || !strcmp(STR, "-heap") \
+  || !strcmp (STR, "-sched-policy") \
+  || !strcmp (STR, "-mem-policy") \
+  || !strcmp (STR, "-sched-cpu-avoid"))
+
+static int
+__upc_get_int_value (const char *str, long int *val,
+		     int accept_multiplier, long int low, long int high)
+{
+  const char *s = str;
+  int factor = 1;
+  long int v;
+  *val = 0;
+  while (*s && isdigit ((int) *s))
+    ++s;
+  if (*s)
+    {
+      if (accept_multiplier && IS_MULT_CHAR (s[0])
+	  && (!s[1] || (s[1] == 'b' || s[1] == 'B')))
+	factor = MULT_FACTOR (s[0]);
+      else
+	return 0;
+    }
+  v = atol (str) * factor;
+  if (v < low || v > high)
+    return 0;
+  *val = v;
+  return 1;
+}
+
+/* Get list of CPUs to exclude from scheduling on (n1,n2,...) */
+static int
+__upc_get_cpu_avoid_values (const char *str, const upc_cpu_avoid_p avoid)
+{
+  const char *s = str;
+  char digits[10];
+  char *const last_digit = &digits[sizeof (digits) - 2];
+  const long int max_cpus = (__upc_num_cpus - 1);
+  while (*s)
+    {
+      char *d = digits;
+      long int v;
+      int status;
+      /* A number has to begin with a valid digit.  */
+      if (!isdigit ((int) *s))
+	return 0;
+      while (*s && isdigit ((int) *s) && d <= last_digit)
+	*d++ = *s++;
+      /* Too many digits.  */
+      if (*s && isdigit ((int) *s))
+	return 0;
+      *d = '\0';
+      status = __upc_get_int_value (digits, &v, 0, 0, max_cpus);
+      if (!status)
+	return 0;
+      __upc_affinity_cpu_avoid_set (v, avoid);
+      if (*s == ',')
+	{
+	  ++s;
+	  /* Something has to follow the comma.  */
+	  if (!*s)
+	    return 0;
+	}
+    }
+  return 1;
+}
+
+static void
+__upc_print_help_and_exit (char *pgm)
+{
+  fprintf (stderr, "usage: %s [UPC switches] ...\n", pgm);
+  fprintf (stderr, "where the possible UPC switches are:\n");
+  fprintf (stderr,
+	   "	-fupc-threads=N or -n N			N is number of threads to run\n");
+  fprintf (stderr,
+	   "						(N must be in the range 1..%d)\n",
+	   GUPCR_THREADS_MAX);
+  fprintf (stderr,
+	   "	-fupc-heap=N or -heap N			N is the maximum per-thread memory\n");
+  fprintf (stderr,
+	   "						allocation heap size\n");
+  fprintf (stderr,
+	   "						The value of N may be followed\n");
+  fprintf (stderr,
+	   "						by a scale factor of K, M, or G\n");
+  fprintf (stderr,
+	   "						(N must be in the range 1..%ld)\n",
+	   GUPCR_MAX_HEAP_SIZE);
+  fprintf (stderr,
+	   "	-sched-policy [cpu,strict,node,auto] 	UPC scheduling policy\n");
+  fprintf (stderr,
+	   "	 					  cpu - bind to CPU\n");
+  fprintf (stderr,
+	   "	 					  strict - bind to CPU (one thread per CPU)\n");
+  fprintf (stderr,
+	   "	                                      	  node - bind to node (if NUMA available)\n");
+  fprintf (stderr,
+	   "	                                      	  auto - let kernel schedule\n");
+  fprintf (stderr,
+	   "	-sched-cpu-avoid n1,n2,.. 		List of CPUs to avoid scheduling on\n");
+  fprintf (stderr,
+	   "	  					  0 to max CPUs\n");
+  fprintf (stderr,
+	   "	-mem-policy [node,strict,auto]		UPC memory allocation policy\n");
+  fprintf (stderr,
+	   "	 				 	  node - allocate on local node first\n");
+  fprintf (stderr,
+	   "	                                  	  strict - only allocate on local node\n");
+  fprintf (stderr,
+	   "	-g                                  	Turn on UPC source code debugging\n");
+
+  exit (2);
+}
+
+static void
+__upc_shift_args (int *argc, char *argv[])
+{
+  if (*argc > 1)
+    {
+      int i;
+      --*argc;
+      for (i = 1; i < *argc; ++i)
+	argv[i] = argv[i + 1];
+    }
+}
+
+static void
+__upc_process_switches (char *pgm, int *argc, char *argv[])
+{
+  long int threads_switch_value = 0;
+#ifdef GUPCR_USE_PTHREADS
+  long int pthreads_switch_value = 0;
+#endif /* GUPCR_USE_PTHREADS */
+  long int heap_switch_value = 0;
+  const char *gum_debug_env = getenv (GUM_DEBUG_ENV);
+  /* Check if GUM debugging is enabled by environment variable.  */
+  if (gum_debug_env && atoi(gum_debug_env))
+    {
+      __upc_gum_debug = 1;
+    }
+  while (*argc >= 2)
+    {
+      const char *arg = argv[1];
+      const char *tval;
+      if (!strcmp (arg, "--"))
+	{
+	  /* -- terminates any upc switches */
+	  __upc_shift_args (argc, argv);
+	  break;		/* exit loop */
+	}
+      if (SWITCH_TAKES_ARG (arg) && (*argc < 3))
+	{
+	  fprintf (stderr, "%s argument requires a value\n", arg);
+	  __upc_print_help_and_exit (pgm);
+	}
+      if (!strncmp (arg, "-fupc-threads=", 14)
+          || !strncmp (arg, "-fupc-threads-", 14))
+	{
+	  tval = arg + 14;
+	  if (!__upc_get_int_value (tval, &threads_switch_value, 0,
+				    1L, (long int) GUPCR_THREADS_MAX))
+	    {
+	      fprintf (stderr, "Invalid THREADS value\n");
+	      __upc_print_help_and_exit (pgm);
+	    }
+	}
+      else if (!strcmp (arg, "-n"))
+	{
+	  tval = argv[2];
+	  if (!__upc_get_int_value (tval, &threads_switch_value, 0,
+				    1L, (long int) GUPCR_THREADS_MAX))
+	    {
+	      fprintf (stderr, "Invalid THREADS value\n");
+	      __upc_print_help_and_exit (pgm);
+	    }
+	  __upc_shift_args (argc, argv);
+	}
+      else if (!strncmp (arg, "-n", 2))
+	{
+	  tval = arg + 2;
+	  if (!__upc_get_int_value (tval, &threads_switch_value, 0,
+				    1L, (long int) GUPCR_THREADS_MAX))
+	    {
+	      fprintf (stderr, "Invalid THREADS value\n");
+	      __upc_print_help_and_exit (pgm);
+	    }
+	}
+#ifdef GUPCR_USE_PTHREADS
+      else if (!strncmp (arg, "-fupc-pthreads-", 15))
+	{
+	  tval = arg + 15;
+	  if (!__upc_get_int_value (tval, &pthreads_switch_value, 0,
+				    1L, (long int) GUPCR_THREADS_MAX))
+	    {
+	      fprintf (stderr, "Invalid UPC pthreads value\n");
+	      __upc_print_help_and_exit (pgm);
+	    }
+	}
+#endif /* GUPCR_USE_PTHREADS */
+      else if (!strncmp (arg, "-fupc-heap=", 11)
+               || !strncmp (arg, "-fupc-heap-", 11))
+	{
+	  tval = arg + 11;
+	  if (!__upc_get_int_value (tval, &heap_switch_value, 1,
+				    1L, GUPCR_MAX_HEAP_SIZE))
+	    {
+	      fprintf (stderr, "Invalid heap size value\n");
+	      __upc_print_help_and_exit (pgm);
+	    }
+	}
+      else if (!strcmp (arg, "-heap"))
+	{
+	  tval = argv[2];
+	  if (!__upc_get_int_value (tval, &heap_switch_value, 1,
+				    1L, GUPCR_MAX_HEAP_SIZE))
+	    {
+	      fprintf (stderr, "Invalid heap size value\n");
+	      __upc_print_help_and_exit (pgm);
+	    }
+	  __upc_shift_args (argc, argv);
+	}
+      else if (!strncmp (arg, "-heap", 5))
+	{
+	  tval = arg + 5;
+	  if (!__upc_get_int_value (tval, &heap_switch_value, 1,
+				    1L, GUPCR_MAX_HEAP_SIZE))
+	    {
+	      fprintf (stderr, "Invalid heap size value\n");
+	      __upc_print_help_and_exit (pgm);
+	    }
+	  __upc_shift_args (argc, argv);
+	}
+      else if (!strcmp (arg, "-sched-policy"))
+	{
+	  if (!__upc_affinity_supported ())
+	    {
+	      fprintf (stderr,
+		       "Scheduling affinity not supported or configured\n");
+	      __upc_print_help_and_exit (pgm);
+	    }
+	  tval = argv[2];
+	  if (!strcmp (tval, "node"))
+	    {
+	      if (!__upc_numa_supported ())
+		{
+		  fprintf (stderr,
+			   "NUMA node affinity not supported or configured\n");
+		  __upc_print_help_and_exit (pgm);
+		}
+	      __upc_sched_policy = GUPCR_SCHED_POLICY_NODE;
+	    }
+	  else if (!strcmp (tval, "strict"))
+	    {
+	      __upc_sched_policy = GUPCR_SCHED_POLICY_CPU_STRICT;
+	    }
+	  else if (!strcmp (tval, "cpu"))
+	    {
+	      __upc_sched_policy = GUPCR_SCHED_POLICY_CPU;
+	    }
+	  else if (!strcmp (tval, "auto"))
+	    {
+	      __upc_sched_policy = GUPCR_SCHED_POLICY_AUTO;
+	    }
+	  else
+	    {
+	      fprintf (stderr, "Invalid scheduling policy specified\n");
+	      __upc_print_help_and_exit (pgm);
+	    }
+	  __upc_shift_args (argc, argv);
+	}
+      else if (!strcmp (arg, "-mem-policy"))
+	{
+	  if (!__upc_numa_supported ())
+	    {
+	      fprintf (stderr,
+		       "NUMA node affinity not supported or configured\n");
+	      __upc_print_help_and_exit (pgm);
+	    }
+	  tval = argv[2];
+	  if (!strcmp (tval, "node"))
+	    {
+	      __upc_mem_policy = GUPCR_MEM_POLICY_NODE;
+	    }
+	  else if (!strcmp (tval, "strict"))
+	    {
+	      __upc_mem_policy = GUPCR_MEM_POLICY_STRICT;
+	    }
+	  else if (!strcmp (tval, "auto"))
+	    {
+	      __upc_mem_policy = GUPCR_MEM_POLICY_AUTO;
+	    }
+	  else
+	    {
+	      fprintf (stderr,
+		       "Invalid memory allocation policy specified\n");
+	      __upc_print_help_and_exit (pgm);
+	    }
+	  __upc_shift_args (argc, argv);
+	}
+      else if (!strcmp (arg, "-sched-cpu-avoid"))
+	{
+	  if (!__upc_affinity_supported ())
+	    {
+	      fprintf (stderr,
+		       "Scheduling affinity not supported or configured\n");
+	      __upc_print_help_and_exit (pgm);
+	    }
+	  tval = argv[2];
+	  if (!__upc_get_cpu_avoid_values (tval, __upc_cpu_avoid_set))
+	    {
+	      fprintf (stderr, "Invalid CPU to avoid string\n");
+	      __upc_print_help_and_exit (pgm);
+	    }
+	  __upc_shift_args (argc, argv);
+	}
+      else if (!strcmp (arg, "-g"))
+	{
+	  __upc_gum_debug = 1;
+	}
+      else
+	/* exit loop at first unrecognized switch.  */
+	break;
+      __upc_shift_args (argc, argv);
+    }
+  if (heap_switch_value)
+    {
+      __upc_init_heap_size = heap_switch_value;
+    }
+  if (threads_switch_value)
+    {
+      if (THREADS > 0)
+	{
+	  if (threads_switch_value != THREADS)
+	    {
+	      fprintf (stderr, "%s: UPC error: The value of the"
+		       " -fupc-threads=N switch: %ld,"
+		       " does not agree with the value given at"
+		       " compile-time: %d\n",
+		       pgm, threads_switch_value, THREADS);
+	      exit (2);
+	    }
+	}
+      else
+	{
+	  THREADS = threads_switch_value;
+#ifdef GUPCR_USE_PTHREADS
+	  if (UPC_PTHREADS == -1)
+	    {
+	      /* UPC threads per process specified as 'dynamic'.
+	         Set it GUPCR_THREADS to specified value of THREADS. */
+	      UPC_PTHREADS = THREADS;
+	    }
+#endif /* GUPCR_USE_PTHREADS */
+	}
+    }
+  if (THREADS <= 0)
+    {
+      fprintf (stderr, "%s: UPC error: No value given for THREADS\n", pgm);
+      __upc_print_help_and_exit (pgm);
+    }
+#ifdef GUPCR_USE_PTHREADS
+  if (pthreads_switch_value)
+    {
+      if (pthreads_switch_value != THREADS)
+	{
+	  fprintf (stderr,
+		   "%s: UPC error: pthreads value must equal the number of UPC threads\n",
+		   pgm);
+	  __upc_print_help_and_exit (pgm);
+	}
+      else
+	{
+	  UPC_PTHREADS = pthreads_switch_value;
+	}
+    }
+#endif /* GUPCR_USE_PTHREADS */
+#if !GUPCR_HAVE_GUM_DEBUG
+  if (__upc_gum_debug)
+    {
+      fprintf (stderr, "UPC debugging not supported or configured\n");
+      __upc_print_help_and_exit (pgm);
+    }
+#endif
+}
+
+static upc_info_p
+__upc_init (char *pgm, const char **err_msg)
+{
+  upc_info_p u;
+  os_heap_p runtime_heap;
+  size_t alloc_data_size, local_size, max_init_alloc, heap_size;
+  size_t mmap_fn_len;
+  char mmap_file_name[2046];
+  upc_page_num_t init_page_alloc;
+  const size_t gpt_size = (GUPCR_VM_MAX_PAGES_PER_THREAD * THREADS)
+    * sizeof (upc_pte_t);
+
+  /* On SGI/Irix, create the shared arena, used for inter-process
+     synchronization, otherwise probably a no-op.  */
+  max_init_alloc =
+    GUPCR_ROUND (sizeof (upc_info_t) + gpt_size + sizeof (mmap_file_name),
+		 0x4000);
+  runtime_heap = __upc_create_runtime_heap (max_init_alloc, err_msg);
+  if (!runtime_heap)
+    return 0;
+
+  /* allocate the UPC info structure */
+  u = __upc_runtime_alloc (sizeof (upc_info_t), &runtime_heap, err_msg);
+  if (!u)
+    return 0;
+  memset (u, '\0', sizeof (upc_info_t));
+
+  u->runtime_heap = runtime_heap;
+  u->program_name = pgm;
+  u->monitor_pid = getpid ();
+  u->num_cpus = __upc_num_cpus;
+  /* Defaults to 1, will be overridden if NUMA supported.  */
+  u->num_nodes = 1;
+  u->sched_policy = __upc_sched_policy;
+  u->mem_policy = __upc_mem_policy;
+
+  /* MPIR_partial_attach_ok support.  */
+  if (MPIR_being_debugged)
+    u->partial_attach_start = 0; /* Stop the threads until MPIR_Breakpoint.  */
+  else
+    u->partial_attach_start = 1; /* No debugging, threads can proceed.  */
+  /* Find host name for MPIR interface.  */
+  if (!gethostname (host_name, HOST_NAME_LEN))
+    u->host_name = host_name;
+  else
+    perror ("unable to find hostname");
+
+  /* Calculate per-thread contribution to global shared memory region. */
+  alloc_data_size = GUPCR_SHARED_SECTION_END - GUPCR_SHARED_SECTION_START;
+  alloc_data_size = GUPCR_ROUND (alloc_data_size, C64K);
+  heap_size = GUPCR_ROUND (__upc_init_heap_size, C64K);
+  local_size = alloc_data_size + heap_size;
+  /* Round up to a page boundary */
+  local_size = GUPCR_ROUND (local_size, GUPCR_VM_PAGE_SIZE);
+  init_page_alloc = local_size / GUPCR_VM_PAGE_SIZE;
+  /* Everything that isn't initially allocated to data will
+     be used for the heap.  */
+  heap_size = local_size - alloc_data_size;
+  u->init_page_alloc = init_page_alloc;
+  u->init_heap_size = heap_size;
+  GUPCR_PTS_SET_NULL_SHARED (u->init_heap_base);
+  GUPCR_PTS_SET_VADDR (u->init_heap_base, alloc_data_size);
+  u->smem_fd = __upc_create_global_mem_file (mmap_file_name, err_msg);
+  if (u->smem_fd < 0)
+    return 0;
+  mmap_fn_len = strlen (mmap_file_name);
+  u->mmap_file_name = (char *) __upc_runtime_alloc (mmap_fn_len + 1,
+						    &runtime_heap, err_msg);
+  if (!u->mmap_file_name)
+    return 0;
+  strcpy (u->mmap_file_name, mmap_file_name);
+  /* Allocate the GPT.  Avoid initializing it, because it may
+     be a rather large data structure of which only a few initial
+     locations are used.  The VM routines that manipulate the
+     GPT will initialize all needed entries as they are used. */
+  u->gpt = (upc_pte_p) __upc_runtime_alloc (gpt_size, &runtime_heap, err_msg);
+  if (!u->gpt)
+    return 0;
+  return u;
+}
+
+/* Per thread initialization.  The VM system has to be initialized
+   in each thread, because it maintains a record of locally
+   mapped memory regions.  Further, for thread 0, the initial
+   data values need to be copied over, and the heap manager
+   must be initialized.  The barrier that is executed subsequent
+   to calling this per thread initialization procedure and prior to
+   calling the main program ensures that the initialization
+   completes before the main program runs.  */
+
+static void
+__upc_per_thread_init (upc_info_p u)
+{
+  typedef void (*func_ptr_t) (void);
+  extern func_ptr_t GUPCR_INIT_ARRAY_START[];
+  extern func_ptr_t GUPCR_INIT_ARRAY_END[];
+  const int n_init = (int)(GUPCR_INIT_ARRAY_END - GUPCR_INIT_ARRAY_START);
+  int i;
+  __upc_vm_init_per_thread ();
+  __upc_lock_init ();
+  __upc_heap_init (u->init_heap_base, u->init_heap_size);
+  __upc_barrier_init ();
+  for (i = 0; i < n_init; ++i)
+    {
+      func_ptr_t init_func = GUPCR_INIT_ARRAY_START[i];
+      /* Skip zero words introduced by section marker, or by the linker.  */
+      if (init_func)
+	(*init_func) ();
+    }
+}
+
+#ifndef GUPCR_USE_PTHREADS
+
+static void
+__upc_run_this_thread (upc_info_p u, int argc, char *argv[],
+		       unsigned int thread_id)
+{
+  int status;
+  MYTHREAD = thread_id;
+  /* Perform per thread initialization.  */
+  __upc_per_thread_init (u);
+  if (THREADS == 1)
+    {
+      /* A single thread is handled as a special case.
+         No child process is created to run the thread. */
+      MPIR_being_debugged = 0;
+      /* Give the debugger a chance to pick up runtime info.  */
+      MPIR_Breakpoint ();
+      /* It is safe to unlink the temporary file, after the breakpoint
+         is hit.  This gives the debugger a chance to open the mmap
+         global memory file so that it can access UPC shared memory.  */
+      if (unlink (u->mmap_file_name) < 0)
+	{
+	  perror ("cannot unlink global shared memory file");
+	  abort ();
+	}
+    }
+  else if (MPIR_being_debugged)
+    {
+      /* Wait for partial attach flag.  */
+      while (!u->partial_attach_start)
+	__upc_yield_cpu ();
+    
+      /* Wait for the debugger to acquire us.  */
+      while (!MPIR_debug_gate)
+	__upc_yield_cpu ();
+    }
+#if GUPCR_HAVE_GUM_DEBUG
+  if (__upc_gum_debug)
+    {
+      __upc_gum_init (THREADS, thread_id);
+    }
+#endif
+  __upc_barrier (GUPCR_RUNTIME_BARRIER_ID);
+  __upc_pupc_init (&argc, &argv);
+  status = GUPCR_MAIN (argc, argv);
+  p_startx (GASP_UPC_COLLECTIVE_EXIT, status);
+  p_endx (GASP_UPC_COLLECTIVE_EXIT, status);
+  __upc_exit (status);
+}
+
+/* Implement UPC threads as processes. */
+static void
+__upc_run_threads (upc_info_p u, int argc, char *argv[])
+{
+  int thread_id;
+  int flag;
+
+  /* Set O_APPEND on stdout and stderr (see Berkeley UPC bug 2136).  */
+  flag = fcntl (STDOUT_FILENO, F_GETFL, 0);
+  if (flag >= 0)
+    (void) fcntl (STDOUT_FILENO, F_SETFL, flag | O_APPEND);
+  flag = fcntl (STDERR_FILENO, F_GETFL, 0);
+  if (flag >= 0)
+    (void) fcntl (STDERR_FILENO, F_SETFL, flag | O_APPEND);
+
+  if (THREADS == 1)
+    {
+      __upc_affinity_set (u, 0);
+      __upc_run_this_thread (u, argc, argv, 0);
+      /* Shouldn't get here.  */
+      abort ();
+    }
+
+  /* In case a debugger is using the value;
+     we don't want it to see two thread zeros */
+  MYTHREAD = -1;
+  /* Allocate space to tell the debugger about
+     the process we're creating */
+  MPIR_proctable = malloc (THREADS * sizeof (*MPIR_proctable));
+  /* Tell the debugger this process is a starter process.  */
+  MPIR_i_am_starter ();
+  for (thread_id = 0; thread_id < THREADS; ++thread_id)
+    {
+      pid_t pid = fork ();
+      if (pid == 0)
+	{
+	  /* child */
+	  __upc_affinity_set (u, thread_id);
+	  __upc_run_this_thread (u, argc, argv, thread_id);
+	}
+      else if (pid > 0)
+	{
+	  /* parent */
+	  u->thread_info[thread_id].pid = pid;
+	  if (MPIR_being_debugged)
+	    {
+	      MPIR_proctable[thread_id].host_name = u->host_name;
+	      MPIR_proctable[thread_id].executable_name = u->program_name;
+	      MPIR_proctable[thread_id].pid = pid;
+	    }
+	}
+      else
+	{
+	  /* error */
+	  perror ("fork");
+	  exit (2);
+	}
+    }
+  /* We're the main process, there are child processes and they're all started.
+   * Let the debugger know about that.
+   */
+  if (MPIR_being_debugged)
+    {
+      MPIR_proctable_size = THREADS;
+      MPIR_debug_state = MPIR_DEBUG_SPAWNED;
+      /* The debugger will have set a breakpoint there... */
+      MPIR_Breakpoint ();
+      /* Release threads.  */
+      u->partial_attach_start = 1;
+    }
+  if (unlink (u->mmap_file_name) < 0)
+    {
+      perror ("cannot unlink global shared memory file");
+      abort ();
+    }
+}
+
+static int
+__upc_get_thread_id (pid_t pid)
+{
+  upc_info_p u = __upc_info;
+  int thread_id;
+  for (thread_id = THREADS - 1;
+       thread_id >= 0 && u->thread_info[thread_id].pid != pid;
+       --thread_id) /* loop */ ;
+  return thread_id;
+}
+
+/* Terminate program. 
+   The monitor thread received a SIGTERM. Terminate
+   all processes in the current process group.  */
+static void
+__upc_sigterm_handler (int sig)
+{
+  struct sigaction action;
+  /* Install the default SIGTERM so monitor thread
+     is killed as part of the group.  */
+  action.sa_handler = SIG_DFL;
+  sigemptyset (&action.sa_mask);
+  action.sa_flags = 0;
+  sigaction (sig, &action, NULL);
+  /* Kill the whole group.  */
+  if (killpg (getpgrp (), sig) == -1)
+    {
+      perror ("killpg");
+      abort ();
+    }
+}
+
+static int
+__upc_monitor_threads (void)
+{
+  upc_info_p u = __upc_info;
+  pid_t pid;
+  int wait_status;
+  int exit_status;
+  int thread_id;
+  int global_exit_invoked;
+  struct sigaction action;
+  exit_status = -1;
+  global_exit_invoked = 0;
+  /* Install SIGTERM handler responsible for
+     terminating the whole program.  */
+  action.sa_handler = __upc_sigterm_handler;
+  sigemptyset (&action.sa_mask);
+  action.sa_flags = 0;
+  sigaction (SIGTERM, &action, NULL);
+  /* Wait for threads to finish.  */
+  for (;;)
+    {
+      pid = waitpid (-1, &wait_status, WNOHANG);
+      /* Check for errors.  */
+      if (pid == -1)
+	{
+	  /* Continue checking if interrupted
+	     (handling other signals).  */
+	  if (errno == EINTR)
+	    continue;
+	  /* Stop waiting if no more children.  */
+	  if (errno == ECHILD)
+	    break;
+	  /* Abort if invalid argument.  */
+	  if (errno == EINVAL)
+	    {
+	      perror ("waitpid");
+	      abort ();
+	    }
+	}
+      /* Not a child exit?  */
+      if (pid == 0)
+	{
+	  /* Check for debugger attach.  */
+	  MPIR_Breakpoint ();
+	  /* Release the CPU for 100mS and continue checking.  */
+	  usleep (100000);
+	  continue;
+	}
+      /* Check for child process that exited.  */
+      thread_id = __upc_get_thread_id (pid);
+      if (!global_exit_invoked && WIFEXITED (wait_status))
+	{
+	  int child_exit = WEXITSTATUS (wait_status);
+	  if (child_exit & 0x80)
+	    {
+	      /* By convention, the result of a call to upc_global_exit
+	         has the high bit in the byte set.
+	         Terminate all the other threads in the program. */
+	      int t;
+	      for (t = 0; t < THREADS; ++t)
+		{
+		  int pid = u->thread_info[t].pid;
+		  if (pid <= 0)
+		    abort ();
+		  if (t != thread_id)
+		    (void) kill (pid, SIGKILL);
+		}
+	      child_exit &= 0x7f;
+	      global_exit_invoked = 1;
+	    }
+	  else if ((exit_status != -1) && exit_status != child_exit)
+	    {
+	      fprintf (stderr, "conflicting exit status (%d) for"
+		       " thread %d\n", child_exit, thread_id);
+	    }
+	  exit_status = child_exit;
+	}
+      else if (WIFSIGNALED (wait_status))
+	{
+	  int child_sig = WTERMSIG (wait_status);
+	  /* Ignore SIGKILL signals.
+	     We use them to implement upc_global_exit(). */
+	  if (child_sig == SIGKILL && global_exit_invoked)
+	    continue;
+	  fprintf (stderr, "thread %d terminated with signal: '%s'\n",
+		   thread_id, __upc_strsignal (child_sig));
+          /*  GASP note: We can't record a noncollective GASP
+              exit event here, because the process has already died.  */
+	  /* We'll all go away now. */
+	  if (killpg (getpid (), SIGTERM) == -1)
+	    {
+	      perror ("killpg");
+	      exit (-1);
+	    }
+	}
+    }
+  return exit_status;
+}
+
+/* Calls to exit() are rewritten into calls to __upc_exit()
+   by #define in <gcc-upc.h>. Simply perform a upc_barrier and
+   then exit the process. Monitor_threads() will pick up
+   the returned status code.  */
+void
+__upc_exit (int status)
+{
+  upc_info_p u = __upc_info;
+  if (!u)
+    __upc_fatal ("UPC runtime not initialized");
+  __upc_acquire_lock (&u->lock);
+  fflush (0);
+  fsync (1);
+  fsync (2);
+  __upc_release_lock (&u->lock);
+  __upc_barrier (GUPCR_RUNTIME_BARRIER_ID);
+  exit (status);
+}
+
+/* upc_global_exit - exit program with given status, terminate
+   all other threads.
+ 
+   The implementation imposes a restriction on exit return codes.
+   If the return code has bit 7 (0x80) set, then the exit code will
+   be interpreted as the code passed to upc_global_exit() and the
+   monitor program will cancel all other executing threads.  */
+void
+upc_global_exit (int status)
+{
+  upc_info_p u = __upc_info;
+  if (!u)
+    __upc_fatal ("UPC runtime not initialized");
+  exit ((THREADS > 1) ? ((status & 0x7f) | 0x80) : status);
+}
+
+#else /* !USE_UPC_PTHREADS */
+
+/* Implement UPC threads as POSIX threads. */
+
+/* UPC rand() pthreads implementation uses per thread seed */
+
+static GUPCR_THREAD_LOCAL unsigned int __upc_rand_seed;
+
+int
+__upc_rand (void)
+{
+  return rand_r (&__upc_rand_seed);
+}
+
+void
+__upc_srand (unsigned int _seed)
+{
+  __upc_rand_seed = _seed;
+}
+
+typedef struct upc_startup_args_struct
+{
+  int thread_id;
+  int argc;
+  char **argv;
+} upc_startup_args_t;
+typedef upc_startup_args_t *upc_startup_args_p;
+
+static void *
+__upc_start_pthread (void *arg)
+{
+  upc_startup_args_p startup_args = arg;
+  int thread_id = startup_args->thread_id;
+  upc_info_p u = __upc_info;
+  int *status_ptr;
+  if (!u)
+    __upc_fatal ("UPC runtime not initialized");
+  /* MYTHREAD is located in thread local storage */
+  MYTHREAD = thread_id;
+  __upc_affinity_set (u, thread_id);
+  /* Perform per thread initialization.  */
+  __upc_per_thread_init (u);
+  /* Initialize random number generator seed.
+     Note: C99 requires an initial seed value of 1, per 7.20.2.2. */
+  __upc_srand (1);
+  status_ptr = &u->thread_info[thread_id].exit_status;
+  __upc_barrier (GUPCR_RUNTIME_BARRIER_ID);
+  __upc_pupc_init (&startup_args->argc, &startup_args->argv);
+  *status_ptr = GUPCR_MAIN (startup_args->argc, startup_args->argv);
+  p_startx (GASP_UPC_COLLECTIVE_EXIT, *status_ptr);
+  p_endx (GASP_UPC_COLLECTIVE_EXIT, *status_ptr);
+  return status_ptr;
+}
+
+static void
+__upc_run_threads (upc_info_p u, int argc, char *argv[])
+{
+  int thread_id;
+  pthread_attr_t thread_attr;
+  size_t stack_size;
+
+  if (THREADS != UPC_PTHREADS)
+    {
+      fprintf (stderr,
+	       "GUPC pthreads implementation requires that PTHREADS be to THREADS.\n");
+      abort ();
+    }
+
+  if (pthread_attr_init(&thread_attr))
+    {
+      perror ("pthread_attr_init");
+      abort ();
+    }
+  if (pthread_attr_getstacksize(&thread_attr, &stack_size))
+    {
+      perror ("pthread_attr_getstacksize");
+      abort ();
+    }
+  /* Add the GUPC's default per-thread stack size to the
+     operating system default.  The OS default will often
+     include enough space to account for TLS variables declared
+     using the __thread qualifier.  */
+  stack_size += GUPCR_DEFAULT_PER_THREAD_STACK_SIZE;
+  if (pthread_attr_setstacksize(&thread_attr, stack_size))
+    {
+      perror ("pthread_attr_setstacksize");
+      abort ();
+    }
+
+  /* technically, we should probably make a thread-local
+     copy of the arg vector. For now, just pass the address. */
+
+  for (thread_id = 0; thread_id < THREADS; ++thread_id)
+    {
+      upc_startup_args_p startup_args;
+      int status;
+      pthread_t pthread_id;
+      startup_args =
+	(upc_startup_args_p) malloc (sizeof (upc_startup_args_t));
+      if (!startup_args)
+	{
+	  perror ("malloc");
+	  abort ();
+	}
+      startup_args->argc = argc;
+      startup_args->argv = argv;
+      startup_args->thread_id = thread_id;
+      status = pthread_create (&pthread_id, &thread_attr,
+			       __upc_start_pthread, startup_args);
+      if (status)
+	{
+	  perror ("pthread_create");
+	  abort ();
+	}
+      u->thread_info[thread_id].os_thread = pthread_id;
+    }
+  if (unlink (u->mmap_file_name) < 0)
+    {
+      perror ("cannot unlink global shared memory file");
+      abort ();
+    }
+}
+
+/* Wait for all pthreads to exit. This implementation requires
+   that there is one pthread per UPC thread. */
+static int
+__upc_monitor_threads (void)
+{
+  int exit_status = -1;
+  upc_info_p u = __upc_info;
+  int t;
+  for (t = 0; t < THREADS; ++t)
+    {
+      pthread_t os_thread = u->thread_info[t].os_thread;
+      void *exit_p;
+      int child_exit = 0;
+      int status;
+      status = pthread_join (os_thread, &exit_p);
+      if (status)
+	{
+	  perror ("pthread_join");
+	  abort ();
+	}
+      child_exit = *((int *) exit_p);
+      if ((exit_status != -1) && exit_status != child_exit)
+	{
+	  fprintf (stderr, "conflicting exit status (%d) for"
+		   " thread %d\n", child_exit, t);
+	}
+      exit_status = child_exit;
+    }
+  return exit_status;
+}
+
+void
+__upc_exit (int status)
+{
+  upc_info_p u = __upc_info;
+  int *status_ptr;
+  int thread_id = MYTHREAD;
+  if (!u)
+    __upc_fatal ("UPC runtime not initialized");
+  __upc_barrier (GUPCR_RUNTIME_BARRIER_ID);
+  status_ptr = &u->thread_info[thread_id].exit_status;
+  *status_ptr = status;
+  pthread_exit (status_ptr);
+}
+
+/* upc_global_exit - exit program with given status, terminate
+   all other threads.
+ 
+   In the pthreads implementation inside a single process,
+   upc_global_exit equates to exit().  */
+void
+upc_global_exit (int status)
+{
+  exit (status);
+}
+
+#endif /* !GUPCR_USE_PTHREADS */
+
+static void
+__upc_notify_debugger_of_abort (const char *mesg)
+{
+  MPIR_debug_abort_string = mesg;
+  MPIR_debug_state = MPIR_DEBUG_ABORTING;
+  MPIR_Breakpoint ();
+}
+
+/* Issue a fatal UPC runtime error.
+
+   Note: this is called by a UPC thread (process) when a fatal runtime
+   error is detected.  */
+void
+__upc_fatal (const char *fmt, ...)
+{
+  upc_info_p u = __upc_info;
+  char msg[4096];
+  char *bp = msg;
+  va_list args, ap;
+  if (u)
+    __upc_acquire_lock (&u->lock);
+  bp += sprintf (bp, "%s: ", __upc_pgm_name);
+  if (__upc_err_filename && __upc_err_linenum)
+    bp += sprintf (bp, "at %s:%u ", __upc_err_filename, __upc_err_linenum);
+  bp += sprintf (bp, "UPC error: ");
+  va_start (ap, fmt);
+  va_copy (args, ap);
+  bp += vsprintf (bp, fmt, args);
+  va_end (args);
+  *bp++ = '\n';
+  fflush (0);
+  (void) fputs (msg, stderr);
+  fflush (0);
+  __upc_notify_debugger_of_abort (msg);
+#if HAVE_UPC_BACKTRACE
+  __upc_backtrace_restore_handlers ();
+  __upc_fatal_backtrace ();
+#endif
+  abort ();
+}
+
+/* UPC runtime start up.  */
+int
+GUPCR_START (int argc, char *argv[])
+{
+  const char *err_msg = 0;
+  int status;
+  upc_info_p u;
+  __upc_sys_init ();
+  __upc_pgm_name = argv[0];
+  __upc_validate_pgm_info (__upc_pgm_name);
+  __upc_cpu_avoid_set = __upc_affinity_cpu_avoid_new ();
+  __upc_process_switches (__upc_pgm_name, &argc, argv);
+  u = __upc_init (__upc_pgm_name, &err_msg);
+  if (!u)
+    {
+      fprintf (stderr, "%s: UPC initialization failed.\n"
+	       "%s: reason: %s\n", __upc_pgm_name, __upc_pgm_name, err_msg);
+      __upc_notify_debugger_of_abort (err_msg);
+      abort ();
+    }
+  __upc_info = u;
+
+#if HAVE_UPC_BACKTRACE
+  /* Initialize backtrace support. */
+  __upc_backtrace_init (__upc_pgm_name);
+#endif
+  /* Initialize UPC runtime spin lock.  We do this after
+     __upc_info has been allocated and initialized, because __upc_init_lock
+     refers to __upc_info on some platforms (eg, SGI/Irix).  */
+  __upc_init_lock (&u->lock);
+  /* Initialize the VM system */
+  __upc_vm_init (u->init_page_alloc);
+  /* Initialize thread affinity */
+  if (!__upc_affinity_init (u, __upc_cpu_avoid_set, &err_msg))
+    {
+      fprintf (stderr, "%s: UPC initialization failed.\n"
+	       "%s: reason: %s\n", __upc_pgm_name, __upc_pgm_name, err_msg);
+      __upc_notify_debugger_of_abort (err_msg);
+      abort ();
+    }
+  __upc_affinity_cpu_avoid_free (__upc_cpu_avoid_set);
+  /* Ensure that the upc_forall depth count is initialized to 0.  */
+  __upc_forall_depth = 0;
+  /* Run the program */
+  __upc_run_threads (u, argc, argv);
+  status = __upc_monitor_threads ();
+  exit (status);
+}
Index: libgupc/smp/upc_mem.c
===================================================================
--- libgupc/smp/upc_mem.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_mem.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,56 @@
+/* Copyright (C) 2001-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+#include "upc_sup.h"
+#include "upc_access.h"
+#include "upc_mem.h"
+
+void
+upc_memcpy (upc_shared_ptr_t dest, upc_shared_ptr_t src, size_t n)
+{
+  __upc_memcpy (dest, src, n);
+}
+
+void
+upc_memget (void *dest, upc_shared_ptr_t src, size_t n)
+{
+  __upc_memget (dest, src, n);
+}
+
+void
+upc_memput (upc_shared_ptr_t dest, const void *src, size_t n)
+{
+  __upc_memput (dest, src, n);
+}
+
+void
+upc_memset (upc_shared_ptr_t dest, int c, size_t n)
+{
+  __upc_memset (dest, c, n);
+}
Index: libgupc/smp/upc_mem.h
===================================================================
--- libgupc/smp/upc_mem.h	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_mem.h	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,136 @@
+/* Copyright (C) 2006-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#ifndef _UPC_MEM_H_
+#define _UPC_MEM_H_
+
+/* The following memory-to-memory operations have been
+   factored into this file because they are needed both
+   by upc_access.c and upc_mem.c  */
+
+//begin lib_inline_mem_sup
+
+__attribute__((__always_inline__))
+static inline
+void
+__upc_memcpy (upc_shared_ptr_t dest, upc_shared_ptr_t src, size_t n)
+{
+  if (GUPCR_PTS_IS_NULL (src))
+    __upc_fatal ("Invalid access via null shared pointer");
+  if (GUPCR_PTS_IS_NULL (dest))
+    __upc_fatal ("Invalid access via null shared pointer");
+  for (;;)
+    {
+      char *srcp = (char *)__upc_sptr_to_addr (src);
+      size_t s_offset  = GUPCR_PTS_OFFSET(src);
+      size_t ps_offset = (s_offset & GUPCR_VM_OFFSET_MASK);
+      size_t ns_copy = GUPCR_VM_PAGE_SIZE - ps_offset;
+      char *destp = (char *)__upc_sptr_to_addr (dest);
+      size_t d_offset  = GUPCR_PTS_OFFSET(dest);
+      size_t pd_offset = (d_offset & GUPCR_VM_OFFSET_MASK);
+      size_t nd_copy = GUPCR_VM_PAGE_SIZE - pd_offset;
+      size_t n_copy = GUPCR_MIN (GUPCR_MIN (ns_copy, nd_copy), n);
+      memcpy (destp, srcp, n_copy);
+      n -= n_copy;
+      if (!n)
+        break;
+      GUPCR_PTS_INCR_VADDR (src, n_copy);
+      GUPCR_PTS_INCR_VADDR (dest, n_copy);
+    }
+}
+
+__attribute__((__always_inline__))
+static inline
+void
+__upc_memget (void *dest, upc_shared_ptr_t src, size_t n)
+{
+  if (!dest)
+    __upc_fatal ("Invalid access via null shared pointer");
+  if (GUPCR_PTS_IS_NULL (src))
+    __upc_fatal ("Invalid access via null shared pointer");
+  for (;;)
+    {
+      char *srcp = (char *)__upc_sptr_to_addr (src);
+      size_t offset = GUPCR_PTS_OFFSET(src);
+      size_t p_offset = (offset & GUPCR_VM_OFFSET_MASK);
+      size_t n_copy = GUPCR_MIN (GUPCR_VM_PAGE_SIZE - p_offset, n);
+      memcpy (dest, srcp, n_copy);
+      n -= n_copy;
+      if (!n)
+        break;
+      GUPCR_PTS_INCR_VADDR (src, n_copy);
+      dest = (char *) dest + n_copy;
+    }
+}
+
+__attribute__((__always_inline__))
+static inline
+void
+__upc_memput (upc_shared_ptr_t dest, const void *src, size_t n)
+{
+  if (!src)
+    __upc_fatal ("Invalid access via null shared pointer");
+  if (GUPCR_PTS_IS_NULL (dest))
+    __upc_fatal ("Invalid access via null shared pointer");
+  for (;;)
+    {
+      char *destp = (char *)__upc_sptr_to_addr (dest);
+      size_t offset = GUPCR_PTS_OFFSET(dest);
+      size_t p_offset = (offset & GUPCR_VM_OFFSET_MASK);
+      size_t n_copy = GUPCR_MIN (GUPCR_VM_PAGE_SIZE - p_offset, n);
+      memcpy (destp, src, n_copy);
+      n -= n_copy;
+      if (!n)
+        break;
+      GUPCR_PTS_INCR_VADDR (dest, n_copy);
+      src = (char *) src + n_copy;
+    }
+}
+
+__attribute__((__always_inline__))
+static inline
+void
+__upc_memset (upc_shared_ptr_t dest, int c, size_t n)
+{
+  if (GUPCR_PTS_IS_NULL (dest))
+    __upc_fatal ("Invalid access via null shared pointer");
+  for (;;)
+    {
+      char *destp = (char *)__upc_sptr_to_addr (dest);
+      size_t offset = GUPCR_PTS_OFFSET(dest);
+      size_t p_offset = (offset & GUPCR_VM_OFFSET_MASK);
+      size_t n_set = GUPCR_MIN (GUPCR_VM_PAGE_SIZE - p_offset, n);
+      memset (destp, c, n_set);
+      n -= n_set;
+      if (!n)
+        break;
+      GUPCR_PTS_INCR_VADDR (dest, n_set);
+    }
+}
+//end lib_inline_mem_sup
+
+#endif /* _UPC_MEM_H_ */
Index: libgupc/smp/upc_nb.upc
===================================================================
--- libgupc/smp/upc_nb.upc	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_nb.upc	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,189 @@
+/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include <upc.h>
+#include <upc_nb.h>
+
+/**
+ * Copy memory with non-blocking explicit handle transfer.
+ *
+ * @param[in] dst Destination shared memory pointer
+ * @param[in] src Source shared memory pointer
+ * @param[in] n Number of bytes to transfer
+ * @retval UPC non-blocking transfer handle
+ */
+upc_handle_t
+upc_memcpy_nb (shared void *restrict dst,
+	       shared const void *restrict src, size_t n)
+{
+  upc_memcpy (dst, src, n);
+  return UPC_COMPLETE_HANDLE;
+}
+
+/**
+ * Get memory with non-blocking explicit handle transfer.
+ *
+ * @param[in] dst Destination local memory pointer
+ * @param[in] src Source remote memory pointer
+ * @param[in] n Number of bytes to transfer
+ * @retval UPC non-blocking transfer handle
+ */
+upc_handle_t
+upc_memget_nb (void *restrict dst,
+	       shared const void *restrict src, size_t n)
+{
+  upc_memget (dst, src, n);
+  return UPC_COMPLETE_HANDLE;
+}
+
+/**
+ * Put memory with non-blocking explicit handle transfer.
+ *
+ * @param[in] dst Destination remote memory pointer
+ * @param[in] src Source local memory pointer
+ * @param[in] n Number of bytes to transfer
+ * @retval UPC non-blocking transfer handle
+ */
+upc_handle_t
+upc_memput_nb (shared void *restrict dst,
+	       const void *restrict src, size_t n)
+{
+  upc_memput (dst, src, n);
+  return UPC_COMPLETE_HANDLE;
+}
+
+/**
+ * Set memory with non-blocking implicit handle transfer.
+ *
+ * @param[in] dst Shared remote pointer
+ * @param[in] c Value for set operation
+ * @param[in] n Number of bytes to set
+ * @retval UPC non-blocking transfer handle
+ */
+upc_handle_t
+upc_memset_nb (shared void *dst, int c, size_t n)
+{
+  upc_memset (dst, c, n);
+  return UPC_COMPLETE_HANDLE;
+}
+
+/**
+ * Explicit handle non-blocking transfer sync attempt.
+ *
+ * @param[in] handle Transfer explicit handle
+ * @retval UPC_NB_COMPLETED returned if transfer completed,
+ *	   otherwise UPC_NB_NOT_COMPLETED
+ */
+int
+upc_sync_attempt (upc_handle_t ARG_UNUSED(handle))
+{
+  return UPC_NB_COMPLETED;
+}
+
+/**
+ * Explicit handle non-blocking transfer sync.
+ *
+ * @param[in] handle Non-blocking transfer explicit handle
+ */
+void
+upc_sync (upc_handle_t ARG_UNUSED(handle))
+{
+}
+
+/**
+ * Copy memory with non-blocking implicit handle transfer.
+ *
+ * @param[in] dst Shared remote memory pointer
+ * @param[in] src Shared remote memory pointer
+ * @param[in] n Number of bytes to transfer
+ */
+void
+upc_memcpy_nbi (shared void *restrict dst,
+		shared const void *restrict src, size_t n)
+{
+  upc_memcpy (dst, src, n);
+}
+
+/**
+ * Get memory with non-blocking implicit handle transfer.
+ *
+ * @param[in] dst Local memory pointer
+ * @param[in] src Shared remote memory pointer
+ * @param[in] n Number of bytes to transfer
+ */
+void
+upc_memget_nbi (void *restrict dst,
+		shared const void *restrict src, size_t n)
+{
+  upc_memget (dst, src, n);
+}
+
+/**
+ * Put memory with non-blocking implicit handle transfer.
+ *
+ * @param[in] dst Shared remote memory pointer
+ * @param[in] src Local memory pointer
+ * @param[in] n Number of bytes to transfer
+ */
+void
+upc_memput_nbi (shared void *restrict dst,
+		const void *restrict src, size_t n)
+{
+  upc_memput (dst, src, n);
+}
+
+/**
+ * Set memory with non-blocking implicit handle transfer.
+ *
+ * @param[in] dst Shared remote pointer
+ * @param[in] c Value for set operation
+ * @param[in] n Number of bytes to set
+ */
+void
+upc_memset_nbi (shared void *dst, int c, size_t n)
+{
+  upc_memset (dst, c, n);
+}
+
+/**
+ * Check on implicit handle non-blocking transfers.
+ *
+ * @retval UPC_NB_COMPLETED if no transfers pending, otherwise
+ *         UPC_NB_NOT_COMPLETED is returned
+ */
+int
+upc_synci_attempt (void)
+{
+  return UPC_NB_COMPLETED;
+}
+
+/**
+ * Complete implicit handle non-blocking transfers.
+ */
+void
+upc_synci (void)
+{
+}
Index: libgupc/smp/upc_numa.c
===================================================================
--- libgupc/smp/upc_numa.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_numa.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,125 @@
+/* Copyright (C) 2008-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#include <numa.h>
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+#include "upc_sup.h"
+#include "upc_affinity.h"
+
+int
+__upc_numa_supported (void)
+{
+  return 1;
+}
+
+int
+__upc_numa_init (const upc_info_p u, const char **ARG_UNUSED (err_msg))
+{
+  u->num_nodes = numa_max_node () + 1;
+  return 1;
+}
+
+int
+__upc_numa_allocate (const upc_info_p u, const int thread_id,
+		     int *sched_affinity, int *mem_affinity,
+		     const char **ARG_UNUSED (err_msg))
+{
+  int pelem;
+  /* schedule threads over nodes */
+  pelem = thread_id % u->num_nodes;
+  *sched_affinity = pelem;
+  *mem_affinity = pelem;
+  return 1;
+}
+
+/* Set node scheduling policy. */
+
+void
+__upc_numa_sched_set (const upc_info_p u, const int thread_id)
+{
+#if defined(LIBNUMA_API_VERSION) && (LIBNUMA_API_VERSION==2)
+  struct bitmask *set = numa_allocate_cpumask(); 
+  numa_node_to_cpus (u->thread_info[thread_id].sched_affinity, set);
+  if (numa_sched_setaffinity (0, set))
+    {
+      __upc_fatal ("Scheduling cannot be set");
+    }
+#else
+  cpu_set_t set;
+  CPU_ZERO (&set);
+  numa_node_to_cpus (u->thread_info[thread_id].sched_affinity,
+		     (unsigned long *) &set, sizeof (set));
+  if (sched_setaffinity (0, sizeof (set), &set))
+    {
+      __upc_fatal ("Scheduling cannot be set");
+    }
+#endif
+}
+
+/* Set memory allocation policy */
+
+void
+__upc_numa_memory_affinity_set (const upc_info_p u, const int thread_id)
+{
+  if (u->mem_policy == GUPCR_MEM_POLICY_NODE)
+    {
+      numa_set_preferred (u->thread_info[thread_id].mem_affinity);
+    }
+  else if (u->mem_policy == GUPCR_MEM_POLICY_STRICT)
+    {
+#if defined(LIBNUMA_API_VERSION) && (LIBNUMA_API_VERSION==2)
+      struct bitmask *nodemask = numa_bitmask_alloc(u->num_nodes);
+      numa_bitmask_setbit (nodemask, u->thread_info[thread_id].mem_affinity);
+      numa_set_membind (nodemask);
+      numa_bitmask_free (nodemask);
+#else
+      nodemask_t nodemask;
+      nodemask_zero (&nodemask);
+      nodemask_set (&nodemask, u->thread_info[thread_id].mem_affinity);
+      numa_set_membind (&nodemask);
+#endif
+    }
+}
+
+/* Set affinity for memory region */
+
+void
+__upc_numa_memory_region_affinity_set (const upc_info_p u,
+				       const int thread_id,
+				       const void *region, const size_t size)
+{
+  if ((u->sched_policy != GUPCR_SCHED_POLICY_AUTO) &&
+         (u->mem_policy != GUPCR_MEM_POLICY_AUTO))
+    {
+      /* memory is being allocated with affinity to "thread" */
+      if (thread_id == MYTHREAD)
+	numa_tonode_memory ((void *) region, size,
+			    u->thread_info[thread_id].mem_affinity);
+    }
+}
Index: libgupc/smp/upc_numa.h
===================================================================
--- libgupc/smp/upc_numa.h	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_numa.h	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,44 @@
+/* Copyright (C) 2008-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#ifndef _UPC_NUMA_H_
+#define _UPC_NUMA_H_
+
+extern int __upc_numa_supported (void);
+extern int __upc_numa_init (const upc_info_p,
+                            const char **err_msg);
+extern int __upc_numa_allocate (const upc_info_p u, const int thread_id,
+				int *sched_affinity, int *mem_affinity,
+				const char **err_msg);
+extern void __upc_numa_sched_set (const upc_info_p, const int);
+extern void __upc_numa_memory_affinity_set (const upc_info_p, const int);
+extern void __upc_numa_memory_region_affinity_set (const upc_info_p u,
+						   const int thread_id,
+						   const void *region,
+						   const size_t size);
+
+#endif /* !_UPC_NUMA_H */
Index: libgupc/smp/upc_numa_stub.c
===================================================================
--- libgupc/smp/upc_numa_stub.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_numa_stub.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,75 @@
+/* Copyright (C) 2008-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+#include "upc_numa.h"
+
+int
+__upc_numa_supported (void)
+{
+  return 0;
+}
+
+int
+__upc_numa_init (const upc_info_p ARG_UNUSED (u),
+                 const char **ARG_UNUSED (err_msg))
+{
+  return 1;
+}
+
+int
+__upc_numa_allocate (const upc_info_p ARG_UNUSED (u),
+                     const int ARG_UNUSED (thread_id),
+		     int *ARG_UNUSED (sched_affinity),
+		     int *ARG_UNUSED (mem_affinity),
+		     const char **err_msg)
+{
+  *err_msg = "UPC error: unable to schedule over nodes - NUMA not available.";
+  return 0;
+}
+
+void
+__upc_numa_sched_set (const upc_info_p ARG_UNUSED (u),
+                      const int ARG_UNUSED (thread_id))
+{
+}
+
+void
+__upc_numa_memory_affinity_set (const upc_info_p ARG_UNUSED (u),
+                                const int ARG_UNUSED (thread_id))
+{
+}
+
+void
+__upc_numa_memory_region_affinity_set (const upc_info_p ARG_UNUSED (u),
+				       const int ARG_UNUSED (thread_id),
+				       const void *ARG_UNUSED (region),
+				       const size_t ARG_UNUSED (size))
+{
+}
Index: libgupc/smp/upc_pgm_info.c
===================================================================
--- libgupc/smp/upc_pgm_info.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_pgm_info.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,303 @@
+/* Copyright (C) 2004-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+#include "upc_sup.h"
+
+
+typedef enum
+  {
+    upc_threads_model_none         = 0,
+    upc_threads_model_process      = 1,
+    upc_threads_model_pthreads_tls = 2
+  } upc_threads_model_t;
+
+typedef struct upc_compiled_thread_info_struct
+  {
+    struct upc_compiled_thread_info_struct *next;
+    char *filename;
+    int nthreads;
+    int npthreads;
+    upc_threads_model_t threads_model;
+  } upc_compiled_thread_info_t;
+typedef upc_compiled_thread_info_t *upc_compiled_thread_info_p;
+
+/* List of compiled UPC files, and the value of THREADS (and pthreads
+   per process) specified at compile-time (-1 means no value given
+   at compile-time). */
+static upc_compiled_thread_info_p __upc_compiled_thread_info = 0;
+
+static
+void
+__upc_print_upc_compiled_thread_info (void)
+{
+   upc_compiled_thread_info_p p;
+   fprintf (stderr, "   THREADS   Threads Model  PTHREADS Filename\n");
+   for (p = __upc_compiled_thread_info; p; p = p->next)
+     {
+	if (p->nthreads > 0)
+	  {
+	    fprintf (stderr, "%10d", p->nthreads);
+	  }
+	else
+	  {
+	    fprintf (stderr, " <dynamic>");
+	  }
+       if (p->threads_model == upc_threads_model_process)
+          {
+	    fprintf (stderr, "         process");
+	  }
+       else if (p->threads_model == upc_threads_model_pthreads_tls)
+          {
+	    fprintf (stderr, "    pthreads-tls");
+	  }
+	if (p->npthreads > 0)
+	  {
+	    fprintf (stderr, "%10d", p->npthreads);
+	  }
+	else
+	  {
+	    fprintf (stderr, " <dynamic>");
+	  }
+       fprintf (stderr, " %s\n", p->filename);
+     }
+}
+
+static
+void
+__upc_register_pgm_info (char *filename, int nthreads,
+                   upc_threads_model_t threads_model, int npthreads)
+{
+   upc_compiled_thread_info_p info =
+	   malloc (sizeof (upc_compiled_thread_info_t));
+   upc_compiled_thread_info_p *p;
+   /* Sort the list by file name. */
+   for (p = &__upc_compiled_thread_info;
+	*p && strcmp (filename, (*p)->filename) >= 0;
+	p = &(*p)->next) /* loop */;
+   info->filename      = filename;
+   info->nthreads      = nthreads;
+   info->threads_model = threads_model;
+   info->npthreads     = npthreads;
+   info->next = *p;
+   *p = info;
+}
+
+static
+void
+__upc_skip_spaces (const char **s)
+{
+  while (**s == ' ') ++(*s);
+}
+
+static
+int
+__upc_match_string (const char **s, const char *string)
+{
+  int slen = strlen(string);
+  if (strncmp(*s, string, slen) != 0)
+    return 0;
+  *s += slen;
+  return 1;
+}
+
+static
+int
+__upc_match_until (const char **s, const char *string)
+{
+  int slen = strlen (string);
+  while (**s && (strncmp(*s, string, slen) != 0)) ++(*s);
+  if (!**s)
+    return 0;
+  *s += slen;
+  return 1;
+}
+
+static
+int
+__upc_match_num (const char **s, int *num)
+{
+  *num = 0;
+  while (**s >= '0' && **s <= '9')
+    {
+      *num = *num * 10 + (**s - '0');
+      ++(*s);
+    }
+  if (*num == 0)
+    return 0;
+  return 1;
+}
+
+/* Examples:
+ $GCCUPCConfig: (t.upc) dynamicthreads process$
+ $GCCUPCConfig: (t.upc) staticcthreads=4 pthreads-tls staticpthreads=4$ */
+static
+void
+__upc_parse_program_info (char *info)
+{
+  char *filename;
+  int nthreads = -1;
+  upc_threads_model_t threads_model = upc_threads_model_none;
+  int npthreads = -1;
+  const char *fname;
+  int fname_len;
+  const char *s = info;
+  if (!__upc_match_string(&s, "$GCCUPCConfig:"))
+    return;
+  __upc_skip_spaces (&s);
+  if (!__upc_match_string(&s, "("))
+    return;
+  fname = s;
+  if (!__upc_match_until(&s, ")"))
+    return;
+  fname_len = (s - fname - 1);
+  filename = (char *)malloc(fname_len + 1);
+  strncpy (filename, fname, fname_len);
+  filename[fname_len] = '\0';
+  while (*s)
+    {
+      __upc_skip_spaces (&s);
+      if (__upc_match_string(&s, "$"))
+        {
+          break;
+        }
+      else if (__upc_match_string(&s, "dynamicthreads"))
+        {
+          nthreads = -1;
+        }
+      else if (__upc_match_string(&s, "staticthreads="))
+        {
+	  if (!__upc_match_num(&s, &nthreads))
+	    return;
+        }
+      else if (__upc_match_string(&s, "process"))
+        {
+	  threads_model = upc_threads_model_process;
+        }
+      else if (__upc_match_string(&s, "pthreads-tls"))
+        {
+	  threads_model = upc_threads_model_pthreads_tls;
+        }
+      else if (__upc_match_string(&s, "dynamicpthreads"))
+        {
+	  npthreads = -1;
+        }
+      else if (__upc_match_string(&s, "staticpthreads="))
+        {
+	  if (!__upc_match_num(&s, &npthreads))
+	    return;
+        }
+      else
+        return;
+    }
+  __upc_register_pgm_info (filename, nthreads, threads_model, npthreads);
+}
+
+void
+__upc_validate_pgm_info (char *pgm)
+{
+   upc_compiled_thread_info_p p;
+   char *info;
+   int nthreads = -1;
+   int npthreads = -1;
+   /* Process all the strings within the program information section.
+      (Ignore intervening null bytes.)  */
+   for (info = GUPCR_PGM_INFO_SECTION_START;
+        info < GUPCR_PGM_INFO_SECTION_END;
+	++info)
+     {
+       if (*info)
+         {
+	   __upc_parse_program_info (info);
+	   info += strlen(info);
+         }
+     }
+   if (!__upc_compiled_thread_info)
+     {
+       fprintf (stderr, "%s: UPC Warning: There are no UPC source files"
+			" compiled into this program,"
+			" or <upc.h> was not included?\n", pgm);
+       return;
+     }
+   for (p = __upc_compiled_thread_info; p; p = p->next)
+     {
+        if (p->nthreads > 0 && nthreads <= 0)
+	  nthreads = p->nthreads;
+        if (p->npthreads > 0 && npthreads <= 0)
+	  npthreads = p->npthreads;
+        /* Static thread/pthread compilations can be intermixed
+	   with dynamic threads compilations, but static values must agree.  */
+        if (((p->nthreads != nthreads)
+	     && (p->nthreads > 0)
+	     && (nthreads > 0))
+	    || ((p->npthreads != npthreads)
+	     && (p->npthreads > 0)
+	     && (npthreads > 0))
+	    || (p->threads_model != __upc_compiled_thread_info->threads_model))
+	  {
+	    fprintf (stderr, "%s: UPC error: The UPC source files in this"
+			     " program were not compiled with the same value"
+			     " of UPC settings.\n", pgm);
+	    fprintf (stderr, "%s: A list of each UPC source file and"
+			     " its compiled UPC settings follows.\n", pgm);
+	    __upc_print_upc_compiled_thread_info ();
+	    exit (2);
+	  }
+     }
+
+#ifndef GUPCR_USE_PTHREADS
+  if (__upc_compiled_thread_info->threads_model != upc_threads_model_process)
+    {
+      fprintf (stderr, "%s: The selected GUPC runtime library"
+                       " supports only the process model."
+		       " Did you link with the correct runtime library?\n",
+		       pgm);
+      exit (2);
+    }
+#else
+  if (__upc_compiled_thread_info->threads_model != upc_threads_model_pthreads_tls)
+    {
+      fprintf (stderr, "%s: The selected GUPC runtime library"
+                       " supports only the POSIX threads model."
+		       " Did you link with the correct runtime library?\n",
+		       pgm);
+      exit (2);
+    }
+#endif /* !GUPCR_USE_PTHREADS */
+
+  THREADS = nthreads;
+#ifdef GUPCR_USE_PTHREADS
+  UPC_PTHREADS = npthreads;
+  if (UPC_PTHREADS == -1)
+    {
+      UPC_PTHREADS = THREADS;
+    }
+#endif /* GUPCR_USE_PTHREADS */
+
+}
Index: libgupc/smp/upc_pts.h
===================================================================
--- libgupc/smp/upc_pts.h	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_pts.h	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,175 @@
+/* Copyright (C) 2008-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#ifndef _UPC_PTS_H_
+#define _UPC_PTS_H_ 1
+
+//begin lib_pts_defs
+
+/* UPC pointer representation */
+
+#if (defined(GUPCR_PTS_STRUCT_REP) + defined(GUPCR_PTS_WORD_PAIR_REP) \
+     + defined(GUPCR_PTS_PACKED_REP)) == 0
+# error Unknown PTS representation.
+#elif (defined(GUPCR_PTS_STRUCT_REP) + defined(GUPCR_PTS_WORD_PAIR_REP) \
+     + defined(GUPCR_PTS_PACKED_REP)) != 1
+# error Only one UPC shared pointer representation setting is permitted.
+#endif
+
+#ifdef GUPCR_PTS_STRUCT_REP
+
+#if GUPCR_PTS_THREAD_SIZE == 32
+#undef GUPCR_PTS_THREAD_TYPE
+#define GUPCR_PTS_THREAD_TYPE u_intSI_t
+#elif GUPCR_PTS_THREAD_SIZE == 16
+#undef GUPCR_PTS_THREAD_TYPE
+#define GUPCR_PTS_THREAD_TYPE u_intHI_t
+#endif
+#if GUPCR_PTS_PHASE_SIZE == 32
+#undef GUPCR_PTS_PHASE_TYPE
+#define GUPCR_PTS_PHASE_TYPE u_intSI_t
+#elif GUPCR_PTS_PHASE_SIZE == 16
+#undef GUPCR_PTS_PHASE_TYPE
+#define GUPCR_PTS_PHASE_TYPE u_intHI_t
+#endif
+
+#if !__GCC_UPC__
+/* The UPC compiler pre-defines upc_shared_ptr_t to be the
+   representation of a shared pointer.  Since most of the
+   runtime is written in regular "C", we need to define
+   the pointer representation here.  */
+typedef struct shared_ptr_struct
+  {
+#if GUPCR_PTS_VADDR_FIRST
+    GUPCR_PTS_VADDR_TYPE  vaddr;
+    GUPCR_PTS_THREAD_TYPE thread;
+    GUPCR_PTS_PHASE_TYPE  phase;
+#else
+    GUPCR_PTS_PHASE_TYPE  phase;
+    GUPCR_PTS_THREAD_TYPE thread;
+    GUPCR_PTS_VADDR_TYPE  vaddr;
+#endif
+  } upc_shared_ptr_t
+#ifdef GUPCR_PTS_ALIGN
+  __attribute__ ((aligned (GUPCR_PTS_ALIGN)))
+#endif
+  ;
+typedef upc_shared_ptr_t *upc_shared_ptr_p;
+/* upc_dbg_shared_ptr_t is used by debugger to figure out
+   shared pointer layout */
+typedef upc_shared_ptr_t upc_dbg_shared_ptr_t;
+#endif
+
+#define GUPCR_PTS_TO_REP(V) *((upc_shared_ptr_t *)&(V)) 
+#define GUPCR_PTS_IS_NULL(P) (!(P).vaddr && !(P).thread && !(P).phase)
+#define GUPCR_PTS_SET_NULL_SHARED(P) \
+   {(P).vaddr = 0; (P).thread = 0; (P).phase = 0;}
+
+#define GUPCR_PTS_VADDR(P) ((size_t)(P).vaddr - (size_t)GUPCR_SHARED_SECTION_START)
+#define GUPCR_PTS_OFFSET(P) ((size_t)(P).vaddr - (size_t)GUPCR_SHARED_SECTION_START)
+#define GUPCR_PTS_THREAD(P) (P).thread
+#define GUPCR_PTS_PHASE(P) (P).phase
+
+#define GUPCR_PTS_SET_VADDR(P,V) (P).vaddr = (GUPCR_PTS_VADDR_TYPE)((char *)(V) \
+			+ (size_t)GUPCR_SHARED_SECTION_START)
+#define GUPCR_PTS_INCR_VADDR(P,V) (P).vaddr += ((size_t)(V))
+#define GUPCR_PTS_SET_THREAD(P,V) (P).thread = (size_t)(V)
+#define GUPCR_PTS_SET_PHASE(P,V) (P).phase = (size_t)(V)
+
+#elif GUPCR_PTS_PACKED_REP
+
+#if GUPCR_PTS_VADDR_FIRST
+#define GUPCR_PTS_VADDR_SHIFT	(GUPCR_PTS_THREAD_SHIFT + GUPCR_PTS_THREAD_SIZE)
+#define GUPCR_PTS_THREAD_SHIFT	GUPCR_PTS_PHASE_SIZE
+#define GUPCR_PTS_PHASE_SHIFT	0
+#else
+#define GUPCR_PTS_VADDR_SHIFT   0
+#define GUPCR_PTS_THREAD_SHIFT  GUPCR_PTS_VADDR_SIZE
+#define GUPCR_PTS_PHASE_SHIFT   (GUPCR_PTS_THREAD_SHIFT + GUPCR_PTS_THREAD_SIZE)
+#endif
+#define GUPCR_PTS_TO_REP(V) *((upc_shared_ptr_t *)&(V)) 
+#if GUPCR_TARGET64
+#define GUPCR_ONE 1UL
+#define GUPCR_PTS_REP_T unsigned long
+#else
+#define GUPCR_ONE 1ULL
+#define GUPCR_PTS_REP_T unsigned long long
+#endif
+#define GUPCR_PTS_VADDR_MASK	((GUPCR_ONE << GUPCR_PTS_VADDR_SIZE) - GUPCR_ONE)
+#define GUPCR_PTS_THREAD_MASK	((GUPCR_ONE << GUPCR_PTS_THREAD_SIZE) - GUPCR_ONE)
+#define GUPCR_PTS_PHASE_MASK	((GUPCR_ONE << GUPCR_PTS_PHASE_SIZE) - GUPCR_ONE)
+
+#if !__GCC_UPC__
+/* upc_dbg_shared_ptr_t is used by debugger to figure out
+   shared pointer layout */
+typedef struct shared_ptr_struct
+  {
+#if GUPCR_PTS_VADDR_FIRST
+    unsigned long long vaddr:GUPCR_PTS_VADDR_SIZE;
+    unsigned int thread:GUPCR_PTS_THREAD_SIZE;
+    unsigned int phase:GUPCR_PTS_PHASE_SIZE;
+#else
+    unsigned int phase:GUPCR_PTS_PHASE_SIZE;
+    unsigned int thread:GUPCR_PTS_THREAD_SIZE;
+    unsigned long long vaddr:GUPCR_PTS_VADDR_SIZE;
+#endif
+  } upc_dbg_shared_ptr_t;
+
+typedef GUPCR_PTS_REP_T upc_shared_ptr_t;
+typedef upc_shared_ptr_t *upc_shared_ptr_p;
+#endif
+
+#define GUPCR_PTS_IS_NULL(P) !(P)
+#define GUPCR_PTS_SET_NULL_SHARED(P) { (P) = 0; }
+
+/* Access functions are optimized for a representation of the
+   form (vaddr,thread,phase) and where the value is unsigned.
+   Thus, right shift is logical (not arithmetic), and masking
+   is avoided for vaddr, and shifting is avoided for phase. 
+   Further, the value being inserted must fit into the field.
+   It will not be masked.  */
+#define GUPCR_PTS_VADDR(P)  \
+  (void *)((size_t)((P)>>GUPCR_PTS_VADDR_SHIFT & GUPCR_PTS_VADDR_MASK))
+#define GUPCR_PTS_THREAD(P) ((size_t)((P)>>GUPCR_PTS_THREAD_SHIFT & GUPCR_PTS_THREAD_MASK))
+#define GUPCR_PTS_PHASE(P)  ((size_t)((P)>>GUPCR_PTS_PHASE_SHIFT & GUPCR_PTS_PHASE_MASK))
+#define GUPCR_PTS_OFFSET(P) ((size_t)((P)>>GUPCR_PTS_VADDR_SHIFT & GUPCR_PTS_VADDR_MASK))
+
+#define GUPCR_PTS_SET_VADDR(P,V) \
+  (P) = ((P) & ~(GUPCR_PTS_VADDR_MASK << GUPCR_PTS_VADDR_SHIFT)) \
+         	| ((GUPCR_PTS_REP_T)(V) << GUPCR_PTS_VADDR_SHIFT)
+#define GUPCR_PTS_SET_THREAD(P,V) (P) = ((P) & ~(GUPCR_PTS_THREAD_MASK << GUPCR_PTS_THREAD_SHIFT)) \
+                                     | ((GUPCR_PTS_REP_T)(V) << GUPCR_PTS_THREAD_SHIFT)
+#define GUPCR_PTS_SET_PHASE(P,V) (P) = ((P) & ~(GUPCR_PTS_PHASE_MASK << GUPCR_PTS_PHASE_SHIFT)) \
+                                     | ((GUPCR_PTS_REP_T)(V) << GUPCR_PTS_PHASE_SHIFT)
+#define GUPCR_PTS_INCR_VADDR(P,V) \
+  ((P) += ((GUPCR_PTS_REP_T)(V) << GUPCR_PTS_VADDR_SHIFT))
+#elif GUPCR_PTS_WORD_PAIR_REP
+#error UPC word pair representation is unsupported.
+#endif /* GUPCR_PTS_*_REP__ */
+//end lib_pts_defs
+
+#endif /* !_UPC_PTS_H_ */
Index: libgupc/smp/upc_pupc.c
===================================================================
--- libgupc/smp/upc_pupc.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_pupc.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,111 @@
+/* Copyright (C) 2009-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#include "gasp.h"
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+#include "upc_pupc.h"
+
+static GUPCR_THREAD_LOCAL gasp_context_t __upc_gasp_ctx;
+
+int
+pupc_control (int on)
+{
+  return gasp_control (__upc_gasp_ctx, on);
+}
+
+unsigned int
+pupc_create_event (const char *name, const char *desc)
+{
+  return gasp_create_event (__upc_gasp_ctx, name, desc);
+}
+
+void
+pupc_event_start (unsigned int evttag, ...)
+{
+  va_list argptr;
+  va_start (argptr, evttag);
+  gasp_event_notifyVA (__upc_gasp_ctx, evttag, GASP_START, NULL, 0, 0,
+		       argptr);
+  va_end (argptr);
+}
+
+void
+pupc_event_end (unsigned int evttag, ...)
+{
+  va_list argptr;
+  va_start (argptr, evttag);
+  gasp_event_notifyVA (__upc_gasp_ctx, evttag, GASP_END, NULL, 0, 0, argptr);
+  va_end (argptr);
+}
+
+void
+pupc_event_atomic (unsigned int evttag, ...)
+{
+  va_list argptr;
+  va_start (argptr, evttag);
+  gasp_event_notifyVA (__upc_gasp_ctx, evttag, GASP_ATOMIC, NULL, 0, 0,
+		       argptr);
+  va_end (argptr);
+}
+
+void
+pupc_event_startg (unsigned int evttag, const char *filename, int linenum, ...)
+{
+  va_list argptr;
+  va_start (argptr, linenum);
+  gasp_event_notifyVA (__upc_gasp_ctx, evttag, GASP_START, filename, linenum, 0,
+		       argptr);
+  va_end (argptr);
+}
+
+void
+pupc_event_endg (unsigned int evttag, const char *filename, int linenum, ...)
+{
+  va_list argptr;
+  va_start (argptr, linenum);
+  gasp_event_notifyVA (__upc_gasp_ctx, evttag, GASP_END, filename, linenum, 0,
+		       argptr);
+  va_end (argptr);
+}
+
+void
+pupc_event_atomicg (unsigned int evttag, const char *filename, int linenum, ...)
+{
+  va_list argptr;
+  va_start (argptr, linenum);
+  gasp_event_notifyVA (__upc_gasp_ctx, evttag, GASP_ATOMIC,
+		       filename, linenum, 0, argptr);
+  va_end (argptr);
+}
+
+void
+__upc_pupc_init (int *argc, char ***argv)
+{
+  __upc_gasp_ctx =  gasp_init (GASP_MODEL_UPC, argc, argv);
+}
Index: libgupc/smp/upc_pupc.h
===================================================================
--- libgupc/smp/upc_pupc.h	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_pupc.h	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,57 @@
+/* Copyright (C) 2009-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#ifndef _UPC_PUPC_H_
+#define _UPC_PUPC_H_
+
+/* See GASP Specification (version 1.5)
+   http://gasp.hcs.ufl.edu/gasp-1.5-61606.pdf  */
+
+extern int pupc_control (int on);
+extern unsigned int pupc_create_event (const char *name, const char *desc);
+
+extern void pupc_event_start (unsigned int evttag, ...);
+extern void pupc_event_end (unsigned int evttag, ...);
+extern void pupc_event_atomic (unsigned int evttag, ...);
+
+extern void pupc_event_startg (unsigned int evttag, const char *file, int line, ...);
+extern void pupc_event_endg (unsigned int evttag, const char *file, int line, ...);
+extern void pupc_event_atomicg (unsigned int evttag, const char *file, int line, ...);
+
+extern void __upc_pupc_init (int *, char ***);
+
+/* The "##__VAR_ARGS__" syntax below, is required to support an empty optional argument
+   see: http://gcc.gnu.org/onlinedocs/cpp/Variadic-Macros.html  */
+#define p_start(evttag, ...)  pupc_event_startg (evttag, filename, linenum, ##__VA_ARGS__)
+#define p_end(evttag, ...)    pupc_event_endg (evttag, filename, linenum, ##__VA_ARGS__)
+#define p_atomic(evttag, ...) pupc_event_atomicg (evttag, filename, linenum, ##__VA_ARGS__)
+
+#define p_startx(evttag, ...)  pupc_event_startg (evttag, NULL, 0, ##__VA_ARGS__)
+#define p_endx(evttag, ...)    pupc_event_endg (evttag, NULL, 0, ##__VA_ARGS__)
+#define p_atomicx(evttag, ...) pupc_event_atomicg (evttag, NULL, 0, ##__VA_ARGS__)
+
+#endif /* _UPC_PUPC_H_ */
Index: libgupc/smp/upc_sup.h
===================================================================
--- libgupc/smp/upc_sup.h	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_sup.h	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,116 @@
+/* Copyright (C) 2006-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#ifndef _UPC_SUP_H_
+#define _UPC_SUP_H_
+
+/* Internal runtime routines and external symbols.  */
+
+//begin lib_runtime_api
+
+extern void *__cvtaddr (upc_shared_ptr_t);
+extern void *__getaddr (upc_shared_ptr_t);
+extern void __upc_barrier (int barrier_id);
+extern void __upc_notify (int barrier_id);
+extern void __upc_wait (int barrier_id);
+extern void __upc_exit (int status)
+      __attribute__ ((__nothrow__))
+      __attribute__ ((__noreturn__));
+extern void __upc_fatal (const char *fmt, ...)
+      __attribute__ ((__format__ (__printf__, 1, 2)))
+      __attribute__ ((__nothrow__))
+      __attribute__ ((__noreturn__));
+
+/* Profiled versions of runtime routines.  */
+extern void *__cvtaddrg (upc_shared_ptr_t, const char *filename, const int linenum);
+extern void *__getaddrg (upc_shared_ptr_t, const char *filename, const int linenum);
+extern void __upc_barrierg (int barrier_id, const char *filename, const int linenum);
+extern void __upc_notifyg (int barrier_id, const char *filename, const int linenum);
+extern void __upc_waitg (int barrier_id, const char *filename, const int linenum);
+extern void __upc_exitg (int status, const char *filename, const int linenum)
+                        __attribute__ ((__noreturn__));
+extern void __upc_funcg (int start, const char *funcname,
+                         const char *filename, const int linenum);
+extern void __upc_forallg (int start, const char *filename, const int linenum);
+//end lib_runtime_api
+
+//begin lib_heap_api
+
+extern void __upc_acquire_alloc_lock (void);
+extern void __upc_release_alloc_lock (void);
+//end lib_heap_api
+
+//begin lib_vm_api
+
+extern void *__upc_vm_map_addr (upc_shared_ptr_t);
+extern int __upc_vm_alloc (upc_page_num_t);
+extern upc_page_num_t __upc_vm_get_cur_page_alloc (void);
+//end lib_vm_api
+
+extern void __upc_heap_init (upc_shared_ptr_t, size_t);
+extern int __upc_start (int argc, char *argv[]);
+extern void __upc_validate_pgm_info (char *);
+extern void __upc_vm_init_per_thread (void);
+extern void __upc_vm_init (upc_page_num_t);
+extern void __upc_barrier_init (void);
+
+//begin lib_sptr_to_addr
+
+/* To speed things up, the last two unique (page, thread)
+   lookups are cached.  Caller must validate the pointer
+   'p' (check for NULL, etc.) before calling this routine. */
+__attribute__((__always_inline__))
+static inline
+void *
+__upc_sptr_to_addr (upc_shared_ptr_t p)
+{
+  extern GUPCR_THREAD_LOCAL unsigned long __upc_page1_ref, __upc_page2_ref;
+  extern GUPCR_THREAD_LOCAL void *__upc_page1_base, *__upc_page2_base;
+  void *addr;
+  size_t offset, p_offset;
+  upc_page_num_t pn;
+  unsigned long this_page;
+  offset = GUPCR_PTS_OFFSET (p);
+  p_offset = offset & GUPCR_VM_OFFSET_MASK;
+  pn = (offset >> GUPCR_VM_OFFSET_BITS) & GUPCR_VM_PAGE_MASK;
+  this_page = (pn << GUPCR_THREAD_SIZE) | GUPCR_PTS_THREAD (p);
+  if (this_page == __upc_page1_ref)
+    addr = (char *) __upc_page1_base + p_offset;
+  else if (this_page == __upc_page2_ref)
+    addr = (char *) __upc_page2_base + p_offset;
+  else
+    addr = __upc_vm_map_addr (p);
+  return addr;
+}
+
+#ifdef __UPC__
+  #define __upc_map_to_local(P)(__upc_sptr_to_addr(*(upc_shared_ptr_t *)&(P)))
+#endif
+
+//end lib_sptr_to_addr
+
+#endif /* _UPC_SUP_H_ */
Index: libgupc/smp/upc_sync.h
===================================================================
--- libgupc/smp/upc_sync.h	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_sync.h	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,157 @@
+/* Copyright (C) 2004-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#ifndef _UPC_SYNC_H_
+#define _UPC_SYNC_H_
+
+//begin lib_fence_defs
+
+/*
+
+The following table (up to PA-RISC) is excerpted from
+"Implementing the UPC memory consistency model for
+shared-memory architectures", Dan Bonachea et al.
+
+CPU		Write fence		Read fence
+--------------------------------------------------
+Power/PowerPC	lwsync			isync
+Alpha		wmb			mb
+x86		lock; addl $0,0(%%esp)  none reqd.
+Athlon/Opteron	mfence			none reqd.
+Itanium		mf			none reqd.
+SPARC		stbar			none reqd.
+MIPS		sync			none reqd.
+PA-RISC		SYNC			none reqd.
+--
+AARCH64         dmb ishst               dmb ishld
+
+*/
+
+#define GUPCR_FENCE() { GUPCR_READ_FENCE (); GUPCR_WRITE_FENCE (); }
+
+#if defined (PPC) || defined (__PPC__)
+#define GUPCR_WRITE_FENCE() asm __volatile__ ("lwsync":::"memory")
+#define GUPCR_READ_FENCE() asm __volatile__ ("isync":::"memory")
+#elif defined (alpha)
+#define GUPCR_WRITE_FENCE() asm __volatile__ ("wmb":::"memory")
+#define GUPCR_READ_FENCE() asm __volatile__ ("mb":::"memory")
+#elif defined (__x86_64__)
+#define GUPCR_WRITE_FENCE() asm __volatile__ ("mfence":::"memory")
+#define GUPCR_READ_FENCE() asm __volatile__ ("":::"memory")
+#elif defined (__ia64__)
+#define GUPCR_WRITE_FENCE() asm __volatile__ ("mf":::"memory")
+#define GUPCR_READ_FENCE() asm __volatile__ ("":::"memory")
+#elif defined (i386)
+#define GUPCR_WRITE_FENCE() asm __volatile__ ("lock; addl $0,0(%%esp)":::"memory")
+#define GUPCR_READ_FENCE() asm __volatile__ ("":::"memory")
+#elif defined (sparc)
+#define GUPCR_WRITE_FENCE() asm __volatile__ ("stbar":::"memory")
+#define GUPCR_READ_FENCE() asm __volatile__ ("":::"memory")
+#elif defined (mips)
+#define GUPCR_WRITE_FENCE() asm __volatile__ ("sync":::"memory")
+#define GUPCR_READ_FENCE() asm __volatile__ ("":::"memory")
+#elif defined (hppa)
+#define GUPCR_WRITE_FENCE() asm __volatile__ ("SYNC":::"memory")
+#define GUPCR_READ_FENCE() asm __volatile__ ("":::"memory")
+#elif defined (__aarch64__)
+#define GUPCR_WRITE_FENCE() asm __volatile__ ("dmb ishst":::"memory")
+#define GUPCR_READ_FENCE() asm __volatile__ ("dmb ishld":::"memory")
+#else
+# error "No memory fence  operations provided for this cpu."
+#endif
+//end lib_fence_defs
+
+//begin lib_atomic
+#if defined (__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) \
+    || defined (__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)
+  /* Use GCC's builtin implementation, if available.  */
+  #define __upc_atomic_cas(PTR, OLD_VAL, NEW_VAL) \
+    __sync_bool_compare_and_swap (PTR, OLD_VAL, NEW_VAL)
+#else
+  extern int __upc_atomic_cas (os_atomic_p, os_atomic_t, os_atomic_t);
+#endif
+
+#if defined (HAVE_SYNC_FETCH_AND_ADD_8) \
+    || defined (HAVE_SYNC_FETCH_AND_ADD_4)
+#define __upc_sync_fetch_and_add(PTR, INC) \
+    __sync_fetch_and_add (PTR, INC)
+#else
+__attribute__ ((__always_inline__))
+static inline
+int
+__upc_sync_fetch_and_add (int *addr, int inc)
+{
+  int old_val, new_val;
+  do
+    {
+      old_val = *addr;
+      new_val = old_val + inc;
+    }
+  while (!__upc_atomic_cas (addr, old_val, new_val));
+  return old_val;
+}
+#endif
+//end lib_atomic
+
+//begin lib_spin_until
+
+/* Give up control of the cpu for a small time interval. */
+#ifdef __sgi__
+#define __upc_yield_cpu() do { sginap(0); } while (0)
+#else
+# ifdef _POSIX_PRIORITY_SCHEDULING
+# define __upc_yield_cpu() do { sched_yield(); } while (0)
+# else
+# define __upc_yield_cpu() do { usleep(1000L); } while (0)
+# endif
+#endif
+
+/* Number of cpu's available */
+extern int __upc_num_cpus;
+
+/* Max. number of iterations to poll waiting for a
+ * spinlock loop condition to be satisfied.
+ */
+#define OS_MAX_SPIN_COUNT (__upc_num_cpus > 1 ? 500 : 0)
+/* Keep spinning until PREDICATE is true,
+ * (this needs to be a macro, to ensure that
+ * PREDICATE is re-evaluated on each iteration. */
+#define __upc_spin_until(PREDICATE) \
+    { \
+      int i = 0; \
+      while (!(PREDICATE)) \
+	{ \
+	  if (++i >= OS_MAX_SPIN_COUNT) \
+	    { \
+	      __upc_yield_cpu (); \
+	      i = 0; \
+	    } \
+	} \
+    }
+//end lib_spin_until
+
+#endif /* _UPC_SYNC_H_ */
Index: libgupc/smp/upc_sysdep.c
===================================================================
--- libgupc/smp/upc_sysdep.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_sysdep.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,319 @@
+/* Copyright (C) 2003-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2, or (at
+   your option) any later version.
+
+   This library is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this library; see the file COPYING.  If not, write to
+   the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+   MA 02111-1307, USA.
+
+   As a special exception, if you link this library with files
+   compiled with a GNU compiler to produce an executable, this does
+   not cause the resulting executable to be covered by the GNU General
+   Public License.  This exception does not however invalidate any
+   other reasons why the executable file might be covered by the GNU
+   General Public License.  */
+
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+#include "upc_sup.h"
+#include "upc_sync.h"
+
+#ifdef __sgi__
+#ifndef _SC_NPROCESSORS_ONLN
+#define _SC_NPROCESSORS_ONLN _SC_NPROC_ONLN
+#endif
+#endif
+
+int __upc_num_cpus;
+
+void
+__upc_sys_init ()
+{
+  __upc_num_cpus = (int) sysconf (_SC_NPROCESSORS_ONLN);
+
+  /* Make sure that this main process is the process group leader */
+  if (getpgrp() != getpid())
+    {
+      if (setpgid(0, 0) == -1)
+        { perror("setpgid"); exit (2); }
+    }
+}
+
+char *__upc_strsignal (sig)
+     int sig;
+{
+  static char sigbuf[64];
+#if defined(__sgi__) || defined(__sun__)
+  char **sys_siglist = _sys_siglist;
+  const int nsig = _sys_nsig;
+#else
+#ifndef __NetBSD__ // signal.h has pointer decl instead of array
+  extern const char * const sys_siglist[];
+#endif
+  const int nsig = NSIG;
+#endif
+  if (sig > 0 && sig < nsig)
+    return (char *)sys_siglist[sig];
+  else
+    return (sprintf (sigbuf, "signal number %d", sig), sigbuf);
+}
+
+#ifndef __upc_atomic_cas
+/* If a builtin implementation of __upc_atomic_cas was found,
+   then the symbol will be defined as a pre-processor macro.
+   Otherwise, implement the function out-of-line.  */
+#ifdef __sgi__
+int
+__upc_atomic_cas (os_atomic_p ptr, os_atomic_t old, os_atomic_t new)
+{
+  upc_info_p u = __upc_info;
+  return uscas ((void *)ptr, old, new, (usptr_t *)u->runtime_heap);
+}
+#elif __i386__
+
+#define LOCK_PREFIX "lock ; "
+
+int
+__upc_atomic_cas (os_atomic_p ptr, os_atomic_t old, os_atomic_t new)
+{
+  os_atomic_t prev;
+  __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
+		       : "=a"(prev)
+		       : "q"(new), "m"(*ptr), "0"(old)
+		       : "memory");
+  return prev == old;
+}
+#elif __x86_64__
+
+#define LOCK_PREFIX "lock ; "
+
+int
+__upc_atomic_cas (os_atomic_p ptr, os_atomic_t old, os_atomic_t new)
+{
+  os_atomic_t prev;
+  __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
+		       : "=a"(prev)
+		       : "q"(new), "m"(*ptr), "0"(old)
+		       : "memory");
+  return prev == old;
+}
+
+#elif __ia64__
+
+#include <linux/types.h>
+#include <asm/intrinsics.h>
+
+int
+__upc_atomic_cas (os_atomic_p ptr, os_atomic_t old, os_atomic_t new)
+{
+  os_atomic_t prev;
+  prev = cmpxchg (ptr, old, new);
+  return prev == old;
+}
+#else
+  #error "__upc_atomic_cas not implemented on this target"
+#endif
+#endif /* ! __upc_atomic_cas */
+
+int
+__upc_atomic_get_bit (os_atomic_p bits, int bitnum)
+{
+  os_atomic_t *word_ptr = bits + (bitnum / OS_BITS_PER_ATOMIC_WORD);
+  os_atomic_t bit = (1 << (bitnum % OS_BITS_PER_ATOMIC_WORD));
+  os_atomic_t word = *word_ptr;
+  GUPCR_READ_FENCE();
+  return (word & bit) != 0;
+}
+
+void
+__upc_atomic_set_bit (os_atomic_p bits, int bitnum)
+{
+  os_atomic_t *word = bits + (bitnum / OS_BITS_PER_ATOMIC_WORD);
+  os_atomic_t bit = (1 << (bitnum % OS_BITS_PER_ATOMIC_WORD));
+  os_atomic_t old_val, new_val;
+  do
+    {
+      old_val = *word;
+      new_val = old_val | bit;
+    }
+  while (!__upc_atomic_cas (word, old_val, new_val));
+}
+
+os_heap_p
+__upc_create_runtime_heap (size_t ARG_UNUSED (max_size),
+                           const char **ARG_UNUSED (err_msg))
+{
+  os_heap_p heap;
+#ifdef __sgi__
+  /* Create the shared arena. */
+  (void) usconfig (CONF_INITSIZE, max_size);
+  heap = (os_heap_p) usinit (DEV_ZERO);
+  if (!heap)
+    { *err_msg = strerror(errno); return 0; }
+#else
+  /* On platforms other than SGI/Irix we don't
+     need a heap, because mmap() will work well
+     for the limited number of data structures
+     allocated during runtime initialization.  */
+  heap = (void *)-1;
+#endif
+  return heap;
+}
+
+void *
+__upc_runtime_alloc (size_t size, os_heap_p *ARG_UNUSED (heap),
+                     const char **err_msg)
+{
+  void *alloc;
+#ifdef __sgi__
+  alloc = (void *) usmalloc (size, (usptr_t *)*heap);
+  if (!alloc)
+    { *err_msg = strerror(errno); return 0; }
+#else
+  alloc = mmap ((void *) 0, size,
+	        PROT_READ | PROT_WRITE,
+	        MAP_SHARED | MAP_ANONYMOUS, -1, OFFSET_ZERO);
+  if (!alloc || alloc == MAP_ERROR)
+    { *err_msg = strerror(errno); return 0; }
+#endif
+  return alloc;
+}
+
+void
+__upc_init_lock (lock)
+     os_lock_p lock;
+{
+#ifdef __sgi__
+  {
+    upc_info_p u = __upc_info;
+    if (!u)
+      __upc_fatal ("UPC runtime not initialized");
+    *lock = (os_lock_t) usnewlock ((usptr_t *) u->runtime_heap);
+    if (!*lock)
+      { perror ("__upc_init_lock"); abort (); }
+  }
+#else
+  *lock = 0;
+#endif
+}
+
+void
+__upc_acquire_lock (lock)
+     os_lock_p lock;
+{
+  if (!lock)
+    __upc_fatal ("NULL shared pointer passed to UPC lock operation");
+#ifdef __sgi__
+  {
+    int status;
+    status = ussetlock(*lock);
+    if (status == 0)
+      __upc_fatal ("upc_lock() could not acquire lock");
+    else if (status < 0)
+      { perror ("upc_acquire_lock"); abort (); }
+  }
+#else
+  __upc_spin_until (__upc_atomic_cas ((os_atomic_p) lock, 0, 1));
+#endif
+  GUPCR_FENCE();
+}
+
+int
+__upc_try_acquire_lock (lock)
+     os_lock_p lock;
+{
+  int status;
+  if (!lock)
+    __upc_fatal ("NULL shared pointer passed to UPC lock operation");
+#ifdef __sgi__
+  status = uscsetlock(*lock, 0);
+  if (status < 0)
+    { perror ("upc_try_acquire_lock"); abort (); }
+#else
+  status = __upc_atomic_cas ((os_atomic_p) lock, 0, 1);
+#endif
+  if (status)
+    GUPCR_FENCE();
+  return status;
+}
+
+void
+__upc_release_lock (lock)
+     os_lock_p lock;
+{
+  if (!lock)
+    __upc_fatal ("NULL shared pointer passed to UPC lock operation");
+  GUPCR_FENCE();
+#ifdef __sgi__
+  {
+    int status;
+    status = usunsetlock(*lock);
+    if (status > 0)
+      __upc_fatal ("upc_unlock() could not release lock");
+    else if (status < 0)
+      { perror ("upc_unlock"); abort (); }
+  }
+#else
+  *((os_atomic_p) lock) = 0;
+#endif
+}
+
+/* Given a "tag" (a relative filename ending in XXXXXX),
+   create a temporary file using the tag.
+   Return a file descriptor associated with the newly
+   created temporary file.
+   [see: http://www.linux.com/howtos/Secure-Programs-HOWTO/avoid-race.shtml]  */
+
+int
+__upc_create_temp_file (const char *tag, char *tmp_fname,
+                        const char **err_msg)
+{
+  const char *tmpdir = NULL;
+  mode_t old_mode;
+  int fd;
+  if ((getuid () == geteuid ()) && (getgid () == getegid ()))
+    {
+      tmpdir = getenv ("TMPDIR");
+      if (!tmpdir)
+        tmpdir = getenv ("TMP");
+    }
+  if (!tmpdir)
+    tmpdir = "/tmp";
+  sprintf (tmp_fname, "%s/%s", tmpdir, tag);
+  /* Create file with restrictive permissions */
+  old_mode = umask (077);
+  fd = mkstemp (tmp_fname);
+  (void) umask (old_mode);
+  if (fd < 0)
+    *err_msg = "Couldn't open temporary file";
+  return fd;
+}
+
+/* Create a file that will be used as the backing store
+   for the UPC program's global shared memory. Return
+   a file descriptor that can subsequently be used
+   to mmap() the file.  If an error is encountered,
+   fd will be set to -1, and err_msg will contain
+   a descriptive error message.  */
+int
+__upc_create_global_mem_file (char *tmp_fname, const char **err_msg)
+{
+  int fd;
+  char fname_template[30];
+  sprintf (fname_template, "upc_shmem.%d.XXXXXX", (int)getpid());
+  fd = __upc_create_temp_file (fname_template, tmp_fname, err_msg);
+  return fd;
+}
Index: libgupc/smp/upc_sysdep.h
===================================================================
--- libgupc/smp/upc_sysdep.h	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_sysdep.h	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,80 @@
+/* GNU UPC Runtime Operating System and Target Platform Dependent Support
+   Copyright (C) 2003-2015 Free Software Foundation, Inc.
+   Free Software Foundation, Inc. 
+   This file is part of the UPC runtime Library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2, or (at
+   your option) any later version.
+
+   This library is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this library; see the file COPYING.  If not, write to
+   the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
+   MA 02111-1307, USA.
+
+   As a special exception, if you link this library with files
+   compiled with a GNU compiler to produce an executable, this does
+   not cause the resulting executable to be covered by the GNU General
+   Public License.  This exception does not however invalidate any
+   other reasons why the executable file might be covered by the GNU
+   General Public License.  */
+
+#ifndef _UPC_OS_H_
+#define _UPC_OS_H_
+
+
+/* An actual heap is required only for the SGI Irix
+   based systems, because spin lock related data
+   structures must live there.  The runtime doesn't
+   otherwise need its own heap, so a null handle is
+   passed around.  */
+#ifdef __sgi__
+typedef usptr_t os_heap_t;
+#else
+typedef void *os_heap_t;
+#endif
+typedef os_heap_t *os_heap_p;
+
+#ifdef __sgi__
+typedef volatile ptrdiff_t os_atomic_t;
+#else
+typedef volatile int os_atomic_t;
+#endif
+typedef os_atomic_t *os_atomic_p;
+
+#define OS_ATOMIC_WORD_SIZE (sizeof(os_atomic_t))
+#define OS_BITS_PER_ATOMIC_WORD (OS_ATOMIC_WORD_SIZE * 8)
+
+#ifdef __sgi__
+typedef ulock_t os_lock_t;
+#else
+typedef os_atomic_t os_lock_t;
+#endif
+typedef os_lock_t *os_lock_p;
+
+extern void __upc_sys_init (void);
+
+extern int __upc_atomic_get_bit (os_atomic_p, int);
+extern void __upc_atomic_set_bit (os_atomic_p, int);
+
+extern void __upc_init_lock (os_lock_p);
+extern void __upc_acquire_lock (os_lock_p);
+extern int __upc_try_acquire_lock (os_lock_p);
+extern void __upc_release_lock (os_lock_p);
+
+extern os_heap_p __upc_create_runtime_heap (size_t, const char **);
+extern void *__upc_runtime_alloc (size_t, os_heap_p *, const char **);
+extern int __upc_create_temp_file (const char *tag, char *tmp_fname, 
+				   const char **err_msg);
+extern int __upc_create_global_mem_file (char *tmp_fname, const char **err_msg);
+extern char *__upc_strsignal (int);
+
+#endif /* !_UPC_OS_H_ */
Index: libgupc/smp/upc_tick.c
===================================================================
--- libgupc/smp/upc_tick.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_tick.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,78 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime Library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+#include "upc_lib.h"
+
+#if HAVE_CLOCK_GETTIME
+#ifdef CLOCK_MONOTONIC_RAW
+/* System clock id passed to clock_gettime. CLOCK_MONOTONIC_RAW
+   is preferred.  It has been available in the Linux kernel
+   since version 2.6.28 */
+#define SYS_RT_CLOCK_ID CLOCK_MONOTONIC_RAW
+#else
+#define SYS_RT_CLOCK_ID CLOCK_MONOTONIC
+#endif
+
+upc_tick_t
+upc_ticks_now (void)
+{
+  struct timespec ts;
+  upc_tick_t t;
+  if (clock_gettime (SYS_RT_CLOCK_ID, &ts) != 0)
+    {
+      perror ("clock_gettime");
+      abort ();
+    }
+  t = (upc_tick_t) ts.tv_sec * 1000000000LL + (upc_tick_t) ts.tv_nsec;
+  return t;
+}
+
+#else /* !HAVE_CLOCK_GETTIME */
+
+upc_tick_t
+upc_ticks_now (void)
+{
+  struct timeval tv;
+  upc_tick_t t;
+  if (gettimeofday (&tv, NULL) != 0)
+    {
+      perror ("gettimeofday");
+      abort ();
+    }
+  t = (upc_tick_t) tv.tv_sec * 1000000000LL + (upc_tick_t) tv.tv_usec * 1000;
+  return t;
+}
+
+#endif
+
+uint64_t
+upc_ticks_to_ns (upc_tick_t ticks)
+{
+  return ticks;
+}
Index: libgupc/smp/upc_vm.c
===================================================================
--- libgupc/smp/upc_vm.c	(.../trunk)	(revision 0)
+++ libgupc/smp/upc_vm.c	(.../branches/gupc)	(revision 231080)
@@ -0,0 +1,363 @@
+/* Copyright (C) 2006-2015 Free Software Foundation, Inc.
+   This file is part of the UPC runtime library.
+   Written by Gary Funck <gary@intrepid.com>
+   and Nenad Vukicevic <nenad@intrepid.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#include "upc_config.h"
+#include "upc_sysdep.h"
+#include "upc_defs.h"
+#include "upc_sup.h"
+#include "upc_sync.h"
+#include "upc_numa.h"
+
+/* There is a local page table for each thread. The
+   local page table maps a local page to the location
+   where it has been mapped into the thread's memory.  */
+typedef void *upc_lpte_t;
+typedef upc_lpte_t *upc_lpte_p;
+GUPCR_THREAD_LOCAL upc_lpte_p __upc_lpt;
+
+/* To speed things up, the last two unique (page, thread)
+   lookups are cached.  See __upc_sptr_to_addr() in upc_sup.h. 
+   NOTE: for this to work correctly GUPCR_VM_GLOBAL_SET_SIZE
+   must be >=2, otherwise a cached mapped entry might be
+   swapped out.  */
+GUPCR_THREAD_LOCAL unsigned long __upc_page1_ref, __upc_page2_ref;
+GUPCR_THREAD_LOCAL void *__upc_page1_base, *__upc_page2_base;
+
+/* Each thread maintains a series of mapped regions
+   of memory that are mapped to specific global pages.
+   The Global Map Table (gmt) is indexed by a hashed
+   global page number to select a row (associative set)
+   of entries that are searched to find a per thread
+   mapping to the global page.  All pages that do not
+   have affinity with the referencing thread are
+   considered to be global.  */
+typedef struct upc_gme_struct
+  {
+    upc_page_num_t global_page_num;
+    void *local_page;
+  } upc_gme_t;
+typedef upc_gme_t *upc_gme_p;
+typedef upc_gme_t upc_gme_set_t[GUPCR_VM_GLOGAl_MAP_SET_SIZE];
+typedef upc_gme_set_t *upc_gme_set_p;
+typedef upc_gme_set_t upc_global_map_t[GUPCR_VM_GLOBAL_MAP_SIZE];
+typedef upc_global_map_t *upc_global_map_p;
+static GUPCR_THREAD_LOCAL upc_global_map_p __upc_gmt;
+
+/* Record the current value of the number of pages allocated.
+   This value is updated to the global value in the UPC info.
+   structure whenever an attempt is made to access a page
+   whose page number is not less than this current value.  */
+GUPCR_THREAD_LOCAL upc_page_num_t __upc_cur_page_alloc;
+
+/* If this thread's idea of how many pages have been allocated
+   per thread is less than the actual value stored in the
+   UPC information structure, map the additional pages allocated
+   for this thread into the local page table.  Mappings
+   in the local page table are never unmapped; this ensures that
+   conversions from pointer to shared to local pointers work
+   as expected.  */
+
+upc_page_num_t
+__upc_vm_get_cur_page_alloc ()
+{
+  const upc_info_p u = __upc_info;
+  const upc_page_num_t old_page_alloc = __upc_cur_page_alloc;
+  upc_page_num_t alloc_pages, p, pt;
+  upc_page_num_t  i, j;
+  if (!u)
+    __upc_fatal ("UPC runtime not initialized");
+  __upc_acquire_lock (&u->lock);
+  /* get the latest value */
+  GUPCR_FENCE ();
+  __upc_cur_page_alloc = u->cur_page_alloc;
+  GUPCR_READ_FENCE ();
+  __upc_release_lock (&u->lock);
+  alloc_pages = __upc_cur_page_alloc - old_page_alloc;
+  if (alloc_pages)
+    {
+      /* Additional pages have been allocated since we last checked.
+         Update the local page table to point to the pages
+	 newly allocated to this thread.  We need to map contiguous regions
+	 in a single mmap call so that conversions to local pointers
+	 work properly.  */
+      upc_page_num_t first_page = old_page_alloc;
+      upc_page_num_t next_global_page = u->gpt[first_page * THREADS + MYTHREAD];
+      upc_page_num_t  region_size = 0;
+      for (i = 0; i < alloc_pages; ++i)
+	{
+	  const upc_page_num_t this_global_page = next_global_page;
+          region_size += 1;
+	  if (i < (alloc_pages - 1))
+	    {
+	       /* If not at the last page, then calculate the
+		  next global page number, so that it can be checked
+		  against the current global page number for contiguity.  */
+	       p = old_page_alloc + (i + 1);
+	       pt = p * THREADS + MYTHREAD;
+	       next_global_page = u->gpt[pt];
+	    }
+	  if ((i == (alloc_pages - 1))
+	      || (next_global_page != (this_global_page + 1)))
+	    {
+	      /* End of region detected.  Map the current region
+	         into the current thread's address space and update
+		 the Local Page Table.  */
+	      const upc_page_num_t gpn = u->gpt[first_page * THREADS + MYTHREAD];
+	      const off_t global_mem_offset = (off_t)gpn * GUPCR_VM_PAGE_SIZE;
+	      const void *region_base = mmap ((void *) 0,
+	                             region_size * GUPCR_VM_PAGE_SIZE,
+				     PROT_READ | PROT_WRITE, MAP_SHARED,
+				     u->smem_fd, global_mem_offset);
+	      if (region_base == MAP_ERROR)
+	        {
+		  perror ("UPC runtime error: can't map local region");
+		  abort ();
+	        }
+              /* set affinity to this region */
+	      __upc_numa_memory_region_affinity_set (u, MYTHREAD, region_base,
+                                                       region_size * GUPCR_VM_PAGE_SIZE);
+	      /* Update the local page table */
+	      for (j = 0; j < region_size; ++j)
+	        {
+		  p = first_page + j;
+		  __upc_lpt[p] = (void *)((size_t)region_base + j * GUPCR_VM_PAGE_SIZE);
+		}
+	      first_page += region_size;
+	      region_size = 0;
+	    }
+	}
+    }
+  return __upc_cur_page_alloc;
+}
+
+/* For pages in threads other than the current thread,
+   check the Global Map Table (gmt) to see if the page
+   has already been mapped into this thread's address
+   space.  The lookup is implemented by first converting
+   (thread, page) into its corresponding global page number.
+   The global page number is then converted into a hash
+   index by adding its higher order bits into the low order
+   bits and then masking modulo the GMT size.  The hash value
+   selects a series of entries which are organized into an
+   N-way associative set, sorted in MRU order.  If no matching
+   entry is found, then map the appropriate global page and
+   update the GMT.  */
+
+static void *
+__upc_vm_map_global_page (int t, upc_page_num_t p)
+{
+  const upc_info_p u = __upc_info;
+  const upc_page_num_t pt = p * THREADS + t;
+  const upc_page_num_t gpn = u->gpt[pt];
+  const upc_page_num_t hash_gpn = ((gpn >> GUPCR_VM_GLOBAL_MAP_BITS) + gpn)
+                                    & GUPCR_VM_GLOBAL_MAP_MASK;
+  const upc_gme_set_p s = &(*__upc_gmt)[hash_gpn];
+  upc_gme_p g;
+  upc_page_num_t this_gpn;
+  off_t global_offset;
+  void *page_base;
+  int i, j;
+  for (i = 0; i < GUPCR_VM_GLOGAl_MAP_SET_SIZE; ++i)
+    {
+      g = &(*s)[i];
+      this_gpn = g->global_page_num;
+      if (gpn == this_gpn)
+        {
+	  /* Found a matching entry. */
+	  page_base = g->local_page;
+	  if (i > 0)
+	    {
+	      const upc_gme_t m = *g;
+	      /* Move this match to the front to preserve MRU order.  */
+	      for (j = i; j > 0; --j) (*s)[j] = (*s)[j - 1];
+	      (*s)[0] = m;
+	    }
+	  return page_base;
+	}
+      /* If this entry is empty, then no match.  Exit early
+         with 'i' pointing to this entry.  */
+      if (this_gpn == GUPCR_VM_PAGE_INVALID)
+        break;
+    }
+  if (i == GUPCR_VM_GLOGAl_MAP_SET_SIZE)
+    {
+      /* The set is full.  Unmap the last entry. */
+      g = &(*s)[GUPCR_VM_GLOGAl_MAP_SET_SIZE - 1];
+      page_base = g->local_page;
+      if (munmap (page_base, GUPCR_VM_PAGE_SIZE))
+        { perror ("UPC runtime error: global unmap"); abort (); }
+      /* Decrement 'i' so that it points to the last entry. */
+      i = i - 1;
+    }
+  /* Shift entries in associative set; make room at the front. */
+  for (j = i; j > 0; --j) (*s)[j] = (*s)[j - 1];
+  /* Map the new entry. */
+  global_offset = (off_t)gpn << GUPCR_VM_OFFSET_BITS;
+  page_base = mmap ((void *) 0, GUPCR_VM_PAGE_SIZE, PROT_READ | PROT_WRITE,
+		      MAP_SHARED, u->smem_fd, global_offset);
+  if (page_base == MAP_ERROR)
+    { perror ("UPC runtime error: can't map global address"); abort (); }
+  /* Add the new entry at the front and return the mapped address.  */
+  g = &(*s)[0];
+  g->global_page_num = gpn;
+  g->local_page = page_base;
+  return page_base;
+}
+
+/* Initialize the VM system.  Create the Global Page Table
+   and initially allocate 'num_init_local_pages' per UPC thread.
+   Although the required physical storage is allocated, the initial
+   mapping occurs is deferred until each thread initializes.  */
+
+void 
+__upc_vm_init (upc_page_num_t num_init_local_pages)
+{
+  if (!__upc_vm_alloc (num_init_local_pages))
+    { perror ("UPC runtime error: can't allocate global memory"); abort (); }
+}
+
+/* Per thread VM initialization.  Create the Local Page Table (lpt)
+   and the Global Map Table (gmt).  Then update the lpt to reflect
+   the initially allocated storage.  */
+
+void
+__upc_vm_init_per_thread ()
+{
+  int i, j;
+  __upc_lpt = (upc_lpte_p) calloc (GUPCR_VM_MAX_PAGES_PER_THREAD, sizeof (upc_lpte_t));
+  if (!__upc_lpt)
+    { perror ("UPC runtime error: can't allocate LPT"); abort (); }
+  __upc_gmt = (upc_global_map_p) malloc (sizeof (upc_global_map_t));
+  if (!__upc_gmt)
+    { perror ("UPC runtime error: can't allocate GMT"); abort (); }
+  /* All entries in the global map are initially empty */
+  for (i = 0; i < GUPCR_VM_GLOBAL_MAP_SIZE; ++i)
+    for (j = 0; j < GUPCR_VM_GLOGAl_MAP_SET_SIZE; ++j)
+      {
+        upc_gme_p g = &(*__upc_gmt)[i][j];
+	g->global_page_num = GUPCR_VM_PAGE_INVALID;
+        g->local_page = (void *)0;
+      }
+  /* Invalidate the page lookup cache keys */
+  __upc_page1_ref = GUPCR_VM_PAGE_INVALID;
+  __upc_page2_ref = GUPCR_VM_PAGE_INVALID;
+  /* Update Local Page Table to reflect initial allocation.  */
+  __upc_cur_page_alloc = 0;
+  (void) __upc_vm_get_cur_page_alloc ();
+}
+
+/* Expand the shared memory file to hold an additional
+   'alloc_pages' per thread.  Update the '__upc_cur_page_alloc'
+   field in the UPC info. block to reflect the size increase.  */
+
+int
+__upc_vm_alloc (upc_page_num_t alloc_pages)
+{
+  const upc_info_p u = __upc_info;
+  upc_page_num_t page_alloc;
+  upc_page_num_t new_page_alloc;
+  off_t smem_size;
+  upc_page_num_t i;
+  if (!u)
+    __upc_fatal ("UPC runtime not initialized");
+  __upc_acquire_lock (&u->lock);
+  GUPCR_FENCE ();
+  page_alloc = u->cur_page_alloc;
+  GUPCR_READ_FENCE ();
+  new_page_alloc = __upc_cur_page_alloc + alloc_pages;
+  if (new_page_alloc > GUPCR_VM_MAX_PAGES_PER_THREAD)
+    {
+      __upc_release_lock (&u->lock);
+      return 0;
+    }
+  smem_size = ((off_t)(new_page_alloc * THREADS)) << GUPCR_VM_OFFSET_BITS;
+  if (ftruncate (u->smem_fd, smem_size))
+    {
+      __upc_release_lock (&u->lock);
+      return 0;
+    }
+  for (i = 0; i < alloc_pages; ++i)
+    {
+      const upc_page_num_t p = page_alloc + i;
+      int t;
+      for (t = 0; t < THREADS; ++t)
+        {
+	  upc_page_num_t pt = p * THREADS + t;
+	  u->gpt[pt] = (page_alloc * THREADS) + (alloc_pages * t) + i;
+	}
+    }
+  GUPCR_WRITE_FENCE ();
+  u->cur_page_alloc = new_page_alloc;
+  GUPCR_FENCE ();
+  __upc_release_lock (&u->lock);
+  return 1;
+}
+
+/* Convert a non-null shared pointer into an address mapped
+   in the current thread's address space.  */
+
+void *
+__upc_vm_map_addr (upc_shared_ptr_t p)
+{
+  size_t offset, p_offset;
+  upc_page_num_t pn;
+  int t;
+  void *page_base;
+  void *addr;
+  offset = GUPCR_PTS_OFFSET(p);
+  p_offset = (offset & GUPCR_VM_OFFSET_MASK);
+  pn = (offset >> GUPCR_VM_OFFSET_BITS) & GUPCR_VM_PAGE_MASK;
+  t = GUPCR_PTS_THREAD(p);
+  /* If the page number exceeds the current value maintained
+     by the referencing thread, update to the most current value,
+     and check again.  */
+  if (pn >= __upc_cur_page_alloc)
+    {
+      __upc_cur_page_alloc = __upc_vm_get_cur_page_alloc ();
+      if (pn >= __upc_cur_page_alloc)
+        __upc_fatal ("Virtual address in shared address is out of range");
+    }
+  if (t == MYTHREAD)
+    {
+      /* A local reference:
+         Refer to the Local Page Table to find the proper mapping.  */
+      page_base = __upc_lpt[pn];
+    }
+  else
+    {
+      /* A global reference to another thread's storage:
+         Refer to the cached map entries in the Global Map Table.  */
+      page_base = __upc_vm_map_global_page (t, pn);
+    }
+  /* Update the cached lookup entries. */
+  __upc_page2_ref = __upc_page1_ref;
+  __upc_page2_base = __upc_page1_base;
+  __upc_page1_ref = (pn << GUPCR_THREAD_SIZE) | t;
+  __upc_page1_base = page_base;
+  addr = (char *)page_base + p_offset;
+  return addr;
+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]