This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp4] nvptx offloading linking (was: [WIP] OpenMP 4 NVPTX support)


Hi!

On Wed, 22 Apr 2015 17:08:26 +0200, Bernd Schmidt <bernds@codesourcery.com> wrote:
> On 04/21/2015 05:58 PM, Jakub Jelinek wrote:
> 
> > suggests that while it is nice that when building nvptx accel compiler
> > we build libgcc.a, libc.a, libm.a, libgfortran.a (and in the future hopefully libgomp.a),
> > nothing attempts to link those in :(.
> 
> I have that fixed; I expect I'll get around to posting this at some 
> point now that stage1 is open.

I have committed the following to gomp-4_0-branch in r223176.  We'll be
submitting this for trunk later on; some changes will need to be done, as
already discussed.

Note that this patch has some dependencies on a patch that I'll be
committing later, ÂAssorted OpenACC changesÂ.  These include
GOACC_get_num_threads and GOACC_get_thread_num interface changes; I
didn't see the point in completely disentangling these changes now.

You'll also want to update your nvptx newlib sources.

The nvptx-tools and offload-nvptx-none GCC installations need to be in
the same prefix, so that the latter can find the nvptx-none assembler,
and doesn't resort to using the Âas binary found first in $PATH, which
likely will be the host system's, and only spewing out a cascade of error
messages when confronted with PTX assembly code.  If you've been using my
build scripts (trunk-offload-big.tar.bz2, trunk-offload-light.tar.bz2;
will upload fixed tarballs later) as posted on
<https://gcc.gnu.org/wiki/Offloading#How_to_try_offloading_enabled_GCC>,
you'll need to apply the following patch:

diff --git BUILD-gcc-offload-nvptx-none BUILD-gcc-offload-nvptx-none
index 664a781..1e815eb 100755
--- BUILD-gcc-offload-nvptx-none
+++ BUILD-gcc-offload-nvptx-none
@@ -11,5 +11,5 @@ if ! test -f .have-configure; then
   ln -vs "$T"/source-newlib/newlib "$T"/source-gcc/newlib &&
-  rm -f "$T"/install/nvptx-none/usr &&
-  mkdir -p "$T"/install/nvptx-none &&
-  ln -vs . "$T"/install/nvptx-none/usr &&
+  rm -f "$T"/install/offload-nvptx-none/nvptx-none/usr &&
+  mkdir -p "$T"/install/offload-nvptx-none/nvptx-none &&
+  ln -vs . "$T"/install/offload-nvptx-none/nvptx-none/usr &&
   target=$("$T"/source-gcc/config.guess) &&
@@ -32,4 +32,4 @@ if ! test -f .have-configure; then
     --with-sysroot=/nvptx-none \
-    --with-build-sysroot="$T"/install/nvptx-none \
-    --with-build-time-tools="$T"/install/nvptx-none/bin \
+    --with-build-sysroot="$T"/install/offload-nvptx-none/nvptx-none \
+    --with-build-time-tools="$T"/install/offload-nvptx-none/nvptx-none/bin \
     --disable-sjlj-exceptions \
diff --git BUILD-nvptx-tools BUILD-nvptx-tools
index b58715d..c093983 100755
--- BUILD-nvptx-tools
+++ BUILD-nvptx-tools
@@ -11,3 +11,3 @@ if ! test -f .have-configure; then
     --target=nvptx-none \
-    --prefix="$T"/install \
+    --prefix="$T"/install/offload-nvptx-none \
     --with-cuda-driver-include=$CUDA/targets/x86_64-linux/include \

commit c4e9c60e860e4bd9996df196bee54d52cda64038
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Wed May 13 20:05:52 2015 +0000

    nvptx offloading linking
    
    	gcc/
    	* config/nvptx/mkoffload.c (enum Kind, struct Token, enum Vis)
    	(struct Stmt): Remove.
    	(read_file, tokenize, write_token, write_tokens, alloc_stmt)
    	(alloc_comment, append_stmt, rev_stmts, write_stmt, write_stmts)
    	(parse_insn, parse_list_nosemi, parse_init, parse_file): Remove
    	functions and macros.
    	(decls, vars, fns): Remove variables.
    	(maybe_unlink): Use save_temps rather than debug to keep files.
    	(tool_cleanup): Unlink ptx_cfile_name and ptx_name.
    	(read_file): Accept a pointer to a length and store into it.
    	(process): Don't try to parse the input file, just write it out as a
    	string, but looking for maps.  Also write out the length.
    	(main): Don't use -S to compile ptx code.  Add -lgomp.  Add
    	COLLECT_MKOFFLOAD_OPTIONS.  Scan for -fopenacc and produce an empty
    	image if it is not set.  Scan for -save-temps.
    	* gcc.c (mkoffload_options): New static variable.
    	(display_help): Mention -Xoffload
    	(driver_handle_option): Handle it.
    	(add_mkoffload_option): New static function.
    	(set_collect_gcc_options): If offloading, set
    	COLLECT_MKOFFLOAD_OPTIONS.
    	* doc/invoke.texi (-Xoffload): Document.
    	* common.opt (Xoffload): New option.
    	* gcc.c (process_command): Use spec_machine rather than
    	spec_host_machine to build tooldir_prefix2.
    	gcc/fortran/
    	* gfortranspec.c (lang_specific_driver): Add -Xoffload options to
    	link -lm and -lgfortran.
    	libgcc/
    	* config.host (nvptx-*): For an offloading build, add libgomp.a
    	and libgomp.spec to extra_parts.
    	* config/nvptx/t-nvptx (gomp-acc_on_device.o, gomp-tids.o)
    	(gomp-atomic.o, libgomp.a, libgomp.spec): New rules.
    	(OBJS_libgomp): New variable.
    	* config/nvptx/gomp-acc_on_device.c: New file.
    	* config/nvptx/gomp-atomic.asm: Likewise.
    	* config/nvptx/gomp-tids.c: Likewise.
    	libgomp/
    	* oacc-ptx.h: Remove file.
    	* plugin/plugin-nvptx.c: Don't include it.
    	(link_ptx): Accept a length argument.  Don't add predefined bits of
    	PTX code.  Look for NUL characters as file boundaries in the input
    	and link the multiple PTX files.
    	(GOMP_OFFLOAD_load_image): Get the size of PTX code from the table
    	and pass it to link_ptx.
    	* testsuite/libgomp.fortran/fortran.exp (lang_link_flags): Add
    	"-Xoffload -lgfortran -Xoffload -lm".
    	* testsuite/libgomp.oacc-fortran/fortran.exp (lang_link_flags):
    	Likewise.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@223176 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog.gomp                                 |  28 +
 gcc/common.opt                                     |   3 +
 gcc/config/nvptx/mkoffload.c                       | 766 +++------------------
 gcc/doc/invoke.texi                                |   7 +-
 gcc/fortran/ChangeLog.gomp                         |   5 +
 gcc/fortran/gfortranspec.c                         |   2 +
 gcc/gcc.c                                          |  36 +-
 libgcc/ChangeLog.gomp                              |  12 +
 libgcc/config.host                                 |   6 +-
 libgcc/config/nvptx/gomp-acc_on_device.c           |   9 +
 libgcc/config/nvptx/gomp-atomic.asm                |  37 +
 libgcc/config/nvptx/gomp-tids.c                    |  66 ++
 libgcc/config/nvptx/t-nvptx                        |  13 +
 libgomp/ChangeLog.gomp                             |  16 +
 libgomp/oacc-ptx.h                                 | 454 ------------
 libgomp/plugin/plugin-nvptx.c                      |  91 +--
 libgomp/testsuite/libgomp.fortran/fortran.exp      |   5 +-
 libgomp/testsuite/libgomp.oacc-fortran/fortran.exp |   5 +-
 18 files changed, 383 insertions(+), 1178 deletions(-)

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index 4a46cdb..8ea9498 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,3 +1,31 @@
+2015-05-13  Bernd Schmidt  <bernds@codesourcery.com>
+
+	* config/nvptx/mkoffload.c (enum Kind, struct Token, enum Vis)
+	(struct Stmt): Remove.
+	(read_file, tokenize, write_token, write_tokens, alloc_stmt)
+	(alloc_comment, append_stmt, rev_stmts, write_stmt, write_stmts)
+	(parse_insn, parse_list_nosemi, parse_init, parse_file): Remove
+	functions and macros.
+	(decls, vars, fns): Remove variables.
+	(maybe_unlink): Use save_temps rather than debug to keep files.
+	(tool_cleanup): Unlink ptx_cfile_name and ptx_name.
+	(read_file): Accept a pointer to a length and store into it.
+	(process): Don't try to parse the input file, just write it out as a
+	string, but looking for maps.  Also write out the length.
+	(main): Don't use -S to compile ptx code.  Add -lgomp.  Add
+	COLLECT_MKOFFLOAD_OPTIONS.  Scan for -fopenacc and produce an empty
+	image if it is not set.  Scan for -save-temps.
+	* gcc.c (mkoffload_options): New static variable.
+	(display_help): Mention -Xoffload
+	(driver_handle_option): Handle it.
+	(add_mkoffload_option): New static function.
+	(set_collect_gcc_options): If offloading, set
+	COLLECT_MKOFFLOAD_OPTIONS.
+	* doc/invoke.texi (-Xoffload): Document.
+	* common.opt (Xoffload): New option.
+	* gcc.c (process_command): Use spec_machine rather than
+	spec_host_machine to build tooldir_prefix2.
+
 2015-05-11  Thomas Schwinge  <thomas@codesourcery.com>
 	    Bernd Schmidt  <bernds@codesourcery.com>
 	    Cesar Philippidis  <cesar@codesourcery.com>
diff --git gcc/common.opt gcc/common.opt
index 51833c1..cebbd01 100644
--- gcc/common.opt
+++ gcc/common.opt
@@ -741,6 +741,9 @@ Driver Separate
 Xlinker
 Driver Separate
 
+Xoffload
+Driver Separate
+
 Xpreprocessor
 Driver Separate
 
diff --git gcc/config/nvptx/mkoffload.c gcc/config/nvptx/mkoffload.c
index 8687154..b918cad 100644
--- gcc/config/nvptx/mkoffload.c
+++ gcc/config/nvptx/mkoffload.c
@@ -41,84 +41,12 @@ const char tool_name[] = "nvptx mkoffload";
 
 #define COMMENT_PREFIX "#"
 
-typedef enum Kind
-{
-  /* 0-ff used for single char tokens */
-  K_symbol = 0x100, /* a symbol */
-  K_label,  /* a label defn (i.e. symbol:) */
-  K_ident,  /* other ident */
-  K_dotted, /* dotted identifier */
-  K_number,
-  K_string,
-  K_comment
-} Kind;
-
-typedef struct Token
-{
-  unsigned short kind : 12;
-  unsigned short space : 1; /* preceded by space */
-  unsigned short end : 1;   /* succeeded by end of line */
-  /* Length of token */
-  unsigned short len;
-
-  /* Token itself */
-  char const *ptr;
-} Token;
-
-/* statement info */
-typedef enum Vis
-{
-  V_dot = 0,  /* random pseudo */
-  V_var = 1,  /* var decl/defn */
-  V_func = 2, /* func decl/defn */
-  V_insn = 3, /* random insn */
-  V_label = 4, /* label defn */
-  V_comment = 5,
-  V_pred = 6,  /* predicate */
-  V_mask = 0x7,
-  V_global = 0x08, /* globalize */
-  V_weak = 0x10,   /* weakly globalize */
-  V_no_eol = 0x20, /* no end of line */
-  V_prefix_comment = 0x40 /* prefixed comment */
-} Vis;
-
-typedef struct Stmt
-{
-  struct Stmt *next;
-  Token *tokens;
-  unsigned char vis;
-  unsigned len : 12;
-  unsigned sym : 12;
-} Stmt;
-
 struct id_map
 {
   id_map *next;
   char *ptx_name;
 };
 
-static const char *read_file (FILE *);
-static Token *tokenize (const char *);
-
-static void write_token (FILE *, const Token *);
-static void write_tokens (FILE *, const Token *, unsigned, int);
-
-static Stmt *alloc_stmt (unsigned, Token *, Token *, const Token *);
-#define alloc_comment(S,E) alloc_stmt (V_comment, S, E, 0)
-#define append_stmt(V, S) ((S)->next = *(V), *(V) = (S))
-static Stmt *rev_stmts (Stmt *);
-static void write_stmt (FILE *, const Stmt *);
-static void write_stmts (FILE *, const Stmt *);
-
-static Token *parse_insn (Token *);
-static Token *parse_list_nosemi (Token *);
-static Token *parse_init (Token *);
-static Token *parse_file (Token *);
-
-static Stmt *decls;
-static Stmt *vars;
-static Stmt *fns;
-
 static id_map *func_ids, **funcs_tail = &func_ids;
 static id_map *var_ids, **vars_tail = &var_ids;
 
@@ -136,7 +64,7 @@ bool target_ilp32 = false;
 void
 maybe_unlink (const char *file)
 {
-  if (! debug)
+  if (!save_temps)
     {
       if (unlink_if_ordinary (file)
 	  && errno != ENOENT)
@@ -149,6 +77,10 @@ maybe_unlink (const char *file)
 void
 tool_cleanup (bool)
 {
+  if (ptx_cfile_name)
+    maybe_unlink (ptx_cfile_name);
+  if (ptx_name)
+    maybe_unlink (ptx_name);
 }
 
 /* Add or change the value of an environment variable, outputting the
@@ -184,7 +116,7 @@ record_id (const char *p1, id_map ***where)
    remember, there could be a NUL in the file itself.  */
 
 static const char *
-read_file (FILE *stream)
+read_file (FILE *stream, size_t *plen)
 {
   size_t alloc = 16384;
   size_t base = 0;
@@ -214,557 +146,10 @@ read_file (FILE *stream)
 	}
     }
   buffer[base] = 0;
+  *plen = base;
   return buffer;
 }
 
-/* Read a token, advancing ptr.
-   If we read a comment, append it to the comments block. */
-
-static Token *
-tokenize (const char *ptr)
-{
-  unsigned alloc = 1000;
-  unsigned num = 0;
-  Token *toks = XNEWVEC (Token, alloc);
-  int in_comment = 0;
-  int not_comment = 0;
-
-  for (;; num++)
-    {
-      const char *base;
-      unsigned kind;
-      int ws = 0;
-      int eol = 0;
-
-    again:
-      base = ptr;
-      if (in_comment)
-	goto block_comment;
-      switch (kind = *ptr++)
-	{
-	default:
-	  break;
-
-	case '\n':
-	  eol = 1;
-	  /* Fall through */
-	case ' ':
-	case '\t':
-	case '\r':
-	case '\v':
-	  /* White space */
-	  ws = not_comment;
-	  goto again;
-
-	case '/':
-	  {
-	    if (*ptr == '/')
-	      {
-		/* line comment.  Do not include trailing \n */
-		base += 2;
-		for (; *ptr; ptr++)
-		  if (*ptr == '\n')
-		    break;
-		kind = K_comment;
-	      }
-	    else if (*ptr == '*')
-	      {
-		/* block comment */
-		base += 2;
-		ptr++;
-
-	      block_comment:
-		eol = in_comment;
-		in_comment = 1;
-		for (; *ptr; ptr++)
-		  {
-		    if (*ptr == '\n')
-		      {
-			ptr++;
-			break;
-		      }
-		    if (ptr[0] == '*' && ptr[1] == '/')
-		      {
-			in_comment = 2;
-			ptr += 2;
-			break;
-		      }
-		  }
-		kind = K_comment;
-	      }
-	    else
-	      break;
-	  }
-	  break;
-
-	case '"':
-	  /* quoted string */
-	  kind = K_string;
-	  while (*ptr)
-	    if (*ptr == '"')
-	      {
-		ptr++;
-		break;
-	      }
-	    else if (*ptr++ == '\\')
-	      ptr++;
-	  break;
-
-	case '.':
-	  if (*ptr < '0' || *ptr > '9')
-	    {
-	      kind = K_dotted;
-	      ws = not_comment;
-	      goto ident;
-	    }
-	  /* FALLTHROUGH */
-	case '0'...'9':
-	  kind = K_number;
-	  goto ident;
-	  break;
-
-	case '$':  /* local labels.  */
-	case '%':  /* register names, pseudoes etc */
-	  kind = K_ident;
-	  goto ident;
-
-	case 'a'...'z':
-	case 'A'...'Z':
-	case '_':
-	  kind = K_symbol; /* possible symbol name */
-	ident:
-	  for (; *ptr; ptr++)
-	    {
-	      if (*ptr >= 'A' && *ptr <= 'Z')
-		continue;
-	      if (*ptr >= 'a' && *ptr <= 'z')
-		continue;
-	      if (*ptr >= '0' && *ptr <= '9')
-		continue;
-	      if (*ptr == '_' || *ptr == '$')
-		continue;
-	      if (*ptr == '.' && kind != K_dotted)
-		/* Idents starting with a dot, cannot have internal dots. */
-		continue;
-	      if ((*ptr == '+' || *ptr == '-')
-		  && kind == K_number
-		  && (ptr[-1] == 'e' || ptr[-1] == 'E'
-		      || ptr[-1] == 'p' || ptr[-1] == 'P'))
-		/* exponent */
-		continue;
-	      break;
-	    }
-	  if (*ptr == ':')
-	    {
-	      ptr++;
-	      kind = K_label;
-	    }
-	  break;
-	}
-
-      if (alloc == num)
-	{
-	  alloc *= 2;
-	  toks = XRESIZEVEC (Token, toks, alloc);
-	}
-      Token *tok = toks + num;
-
-      tok->kind = kind;
-      tok->space = ws;
-      tok->end = 0;
-      tok->ptr = base;
-      tok->len = ptr - base - in_comment;
-      in_comment &= 1;
-      not_comment = kind != K_comment;
-      if (eol && num)
-	tok[-1].end = 1;
-      if (!kind)
-	break;
-    }
-
-  return toks;
-}
-
-/* Write an encoded token. */
-
-static void
-write_token (FILE *out, Token const *tok)
-{
-  if (tok->space)
-    fputc (' ', out);
-
-  switch (tok->kind)
-    {
-    case K_string:
-      {
-	const char *c = tok->ptr + 1;
-	size_t len = tok->len - 2;
-
-	fputs ("\\\"", out);
-	while (len)
-	  {
-	    const char *bs = (const char *)memchr (c, '\\', len);
-	    size_t l = bs ? bs - c : len;
-
-	    fprintf (out, "%.*s", (int)l, c);
-	    len -= l;
-	    c += l;
-	    if (bs)
-	      {
-		fputs ("\\\\", out);
-		len--, c++;
-	      }
-	  }
-	fputs ("\\\"", out);
-      }
-      break;
-
-    default:
-      /* All other tokens shouldn't have anything magic in them */
-      fprintf (out, "%.*s", tok->len, tok->ptr);
-      break;
-    }
-  if (tok->end)
-    fputs ("\\n", out);
-}
-
-static void
-write_tokens (FILE *out, Token const *toks, unsigned len, int spc)
-{
-  fputs ("\t\"", out);
-  for (; len--; toks++)
-    write_token (out, toks);
-  if (spc)
-    fputs (" ", out);
-  fputs ("\"", out);
-}
-
-static Stmt *
-alloc_stmt (unsigned vis, Token *tokens, Token *end, Token const *sym)
-{
-  static unsigned alloc = 0;
-  static Stmt *heap = 0;
-
-  if (!alloc)
-    {
-      alloc = 1000;
-      heap = XNEWVEC (Stmt, alloc);
-    }
-
-  Stmt *stmt = heap++;
-  alloc--;
-
-  tokens->space = 0;
-  stmt->next = 0;
-  stmt->vis = vis;
-  stmt->tokens = tokens;
-  stmt->len = end - tokens;
-  stmt->sym = sym ? sym - tokens : ~0;
-
-  return stmt;
-}
-
-static Stmt *
-rev_stmts (Stmt *stmt)
-{
-  Stmt *prev = 0;
-  Stmt *next;
-
-  while (stmt)
-    {
-      next = stmt->next;
-      stmt->next = prev;
-      prev = stmt;
-      stmt = next;
-    }
-
-  return prev;
-}
-
-static void
-write_stmt (FILE *out, const Stmt *stmt)
-{
-  if ((stmt->vis & V_mask) != V_comment)
-    {
-      write_tokens (out, stmt->tokens, stmt->len,
-		    (stmt->vis & V_mask) == V_pred);
-      fputs (stmt->vis & V_no_eol ? "\t" : "\n", out);
-    }
-}
-
-static void
-write_stmts (FILE *out, const Stmt *stmts)
-{
-  for (; stmts; stmts = stmts->next)
-    write_stmt (out, stmts);
-}
-
-static Token *
-parse_insn (Token *tok)
-{
-  unsigned depth = 0;
-
-  do
-    {
-      Stmt *stmt;
-      Token *sym = 0;
-      unsigned s = V_insn;
-      Token *start = tok;
-
-      switch (tok++->kind)
-	{
-	case K_comment:
-	  while (tok->kind == K_comment)
-	    tok++;
-	  stmt = alloc_comment (start, tok);
-	  append_stmt (&fns, stmt);
-	  continue;
-
-	case '{':
-	  depth++;
-	  break;
-
-	case '}':
-	  depth--;
-	  break;
-
-	case K_label:
-	  if (tok[-1].ptr[0] != '$')
-	    sym = tok - 1;
-	  tok[-1].end = 1;
-	  s = V_label;
-	  break;
-
-	case '@':
-	  tok->space = 0;
-	  if (tok->kind == '!')
-	    tok++;
-	  if (tok->kind == K_symbol)
-	    sym = tok;
-	  tok++;
-	  s = V_pred;
-	  break;
-
-	default:
-	  for (; tok->kind != ';'; tok++)
-	    {
-	      if (tok->kind == ',')
-		tok[1].space = 0;
-	      else if (tok->kind == K_symbol)
-		sym = tok;
-	    }
-	  tok++->end = 1;
-	  break;
-	}
-
-      stmt = alloc_stmt (s, start, tok, sym);
-      append_stmt (&fns, stmt);
-
-      if (!tok[-1].end && tok[0].kind == K_comment)
-	{
-	  stmt->vis |= V_no_eol;
-	  stmt = alloc_comment (tok, tok + 1);
-	  append_stmt (&fns, stmt);
-	  tok++;
-	}
-    }
-  while (depth);
-
-  return tok;
-}
-
-/* comma separated list of tokens */
-
-static Token *
-parse_list_nosemi (Token *tok)
-{
-  Token *start = tok;
-
-  do
-    if (!(++tok)->kind)
-      break;
-  while ((++tok)->kind == ',');
-
-  tok[-1].end = 1;
-  Stmt *stmt = alloc_stmt (V_dot, start, tok, 0);
-  append_stmt (&decls, stmt);
-
-  return tok;
-}
-
-#define is_keyword(T,S) \
-  (sizeof (S) == (T)->len && !memcmp ((T)->ptr + 1, (S), (T)->len - 1))
-
-static Token *
-parse_init (Token *tok)
-{
-  for (;;)
-    {
-      Token *start = tok;
-      Token const *sym = 0;
-      Stmt *stmt;
-
-      if (tok->kind == K_comment)
-	{
-	  while (tok->kind == K_comment)
-	    tok++;
-	  stmt = alloc_comment (start, tok);
-	  append_stmt (&vars, stmt);
-	  start = tok;
-	}
-
-      if (tok->kind == '{')
-	tok[1].space = 0;
-      for (; tok->kind != ',' && tok->kind != ';'; tok++)
-	if (tok->kind == K_symbol)
-	  sym = tok;
-      tok[1].space = 0;
-      int end = tok++->kind == ';';
-      stmt = alloc_stmt (V_insn, start, tok, sym);
-      append_stmt (&vars, stmt);
-      if (!tok[-1].end && tok->kind == K_comment)
-	{
-	  stmt->vis |= V_no_eol;
-	  stmt = alloc_comment (tok, tok + 1);
-	  append_stmt (&vars, stmt);
-	  tok++;
-	}
-      if (end)
-	break;
-    }
-  return tok;
-}
-
-static Token *
-parse_file (Token *tok)
-{
-  Stmt *comment = 0;
-
-  if (tok->kind == K_comment)
-    {
-      Token *start = tok;
-
-      while (tok->kind == K_comment)
-	{
-	  if (strncmp (tok->ptr, ":VAR_MAP ", 9) == 0)
-	    record_id (tok->ptr + 9, &vars_tail);
-	  if (strncmp (tok->ptr, ":FUNC_MAP ", 10) == 0)
-	    record_id (tok->ptr + 10, &funcs_tail);
-	  tok++;
-	}
-      comment = alloc_comment (start, tok);
-      comment->vis |= V_prefix_comment;
-    }
-
-  if (tok->kind == K_dotted)
-    {
-      if (is_keyword (tok, "version")
-	  || is_keyword (tok, "target")
-	  || is_keyword (tok, "address_size"))
-	{
-	  if (comment)
-	    append_stmt (&decls, comment);
-	  tok = parse_list_nosemi (tok);
-	}
-      else
-	{
-	  unsigned vis = 0;
-	  const Token *def = 0;
-	  unsigned is_decl = 0;
-	  Token *start;
-
-	  for (start = tok;
-	       tok->kind && tok->kind != '=' && tok->kind != K_comment
-		 && tok->kind != '{' && tok->kind != ';'; tok++)
-	    {
-	      if (is_keyword (tok, "global")
-		  || is_keyword (tok, "const"))
-		vis |= V_var;
-	      else if (is_keyword (tok, "func")
-		       || is_keyword (tok, "entry"))
-		vis |= V_func;
-	      else if (is_keyword (tok, "visible"))
-		vis |= V_global;
-	      else if (is_keyword (tok, "extern"))
-		is_decl = 1;
-	      else if (is_keyword (tok, "weak"))
-		vis |= V_weak;
-	      if (tok->kind == '(')
-		{
-		  tok[1].space = 0;
-		  tok[0].space = 1;
-		}
-	      else if (tok->kind == ')' && tok[1].kind != ';')
-		tok[1].space = 1;
-
-	      if (tok->kind == K_symbol)
-		def = tok;
-	    }
-
-	  if (!tok->kind)
-	    {
-	      /* end of file */
-	      if (comment)
-		append_stmt (&fns, comment);
-	    }
-	  else if (tok->kind == '{'
-		   || tok->kind == K_comment)
-	    {
-	      /* function defn */
-	      Stmt *stmt = alloc_stmt (vis, start, tok, def);
-	      if (comment)
-		{
-		  append_stmt (&fns, comment);
-		  stmt->vis |= V_prefix_comment;
-		}
-	      append_stmt (&fns, stmt);
-	      tok = parse_insn (tok);
-	    }
-	  else
-	    {
-	      int assign = tok->kind == '=';
-
-	      tok++->end = 1;
-	      if ((vis & V_mask) == V_var && !is_decl)
-		{
-		  /* variable */
-		  Stmt *stmt = alloc_stmt (vis, start, tok, def);
-		  if (comment)
-		    {
-		      append_stmt (&vars, comment);
-		      stmt->vis |= V_prefix_comment;
-		    }
-		  append_stmt (&vars, stmt);
-		  if (assign)
-		    tok = parse_init (tok);
-		}
-	      else
-		{
-		  /* declaration */
-		  Stmt *stmt = alloc_stmt (vis, start, tok, 0);
-		  if (comment)
-		    {
-		      append_stmt (&decls, comment);
-		      stmt->vis |= V_prefix_comment;
-		    }
-		  append_stmt (&decls, stmt);
-		}
-	    }
-	}
-    }
-  else
-    {
-      /* Something strange.  Ignore it.  */
-      if (comment)
-	append_stmt (&fns, comment);
-
-      do
-	tok++;
-      while (tok->kind && !tok->end);
-    }
-  return tok;
-}
-
 /* Parse STR, saving found tokens into PVALUES and return their number.
    Tokens are assumed to be delimited by ':'.  */
 static unsigned
@@ -840,19 +225,50 @@ access_check (const char *name, int mode)
 static void
 process (FILE *in, FILE *out)
 {
-  const char *input = read_file (in);
-  Token *tok = tokenize (input);
+  size_t len;
+  const char *input = read_file (in, &len);
+
+  fprintf (out, "static const char ptx_code[] = \n \"");
+  for (size_t i = 0; i < len; i++)
+    {
+      char c = input[i];
+      bool nl = false;
+      switch (c)
+	{
+	case '\0':
+	  putc ('\\', out);
+	  c = '0';
+	  break;
+	case '\r':
+	  continue;
+	case '\n':
+	  putc ('\\', out);
+	  c = 'n';
+	  nl = true;
+	  break;
+	case '"':
+	case '\\':
+	  putc ('\\', out);
+	  break;
+
+	case '/':
+	  if (strncmp (input + i, "//:VAR_MAP ", 11) == 0)
+	    record_id (input + i + 11, &vars_tail);
+	  if (strncmp (input + i, "//:FUNC_MAP ", 12) == 0)
+	    record_id (input + i + 12, &funcs_tail);
+	  break;
+
+	default:
+	  break;
+	}
+      putc (c, out);
+      if (nl)
+	fputs ("\"\n\t\"", out);
+    }
+  fprintf (out, "\";\n\n");
+
   unsigned int nvars = 0, nfuncs = 0;
 
-  do
-    tok = parse_file (tok);
-  while (tok->kind);
-
-  fprintf (out, "static const char ptx_code[] = \n");
-  write_stmts (out, rev_stmts (decls));
-  write_stmts (out, rev_stmts (vars));
-  write_stmts (out, rev_stmts (fns));
-  fprintf (out, ";\n\n");
   fprintf (out, "static const char *var_mappings[] = {\n");
   for (id_map *id = var_ids; id; id = id->next, nvars++)
     fprintf (out, "\t\"%s\"%s\n", id->ptx_name, id->next ? "," : "");
@@ -863,8 +279,9 @@ process (FILE *in, FILE *out)
   fprintf (out, "};\n\n");
 
   fprintf (out, "static const void *target_data[] = {\n");
-  fprintf (out, "  ptx_code, (void*) %u, var_mappings, (void*) %u, "
-		"func_mappings\n", nvars, nfuncs);
+  fprintf (out, "  ptx_code, (void *)(__UINTPTR_TYPE__)sizeof (ptx_code),\n");
+  fprintf (out, "  (void *) %u, var_mappings, (void *) %u, func_mappings\n",
+	   nvars, nfuncs);
   fprintf (out, "};\n\n");
 
   fprintf (out, "extern void GOMP_offload_register (const void *, int, void *);\n");
@@ -983,47 +400,74 @@ main (int argc, char **argv)
   obstack_ptr_grow (&argv_obstack, driver);
   obstack_ptr_grow (&argv_obstack, "-xlto");
   obstack_ptr_grow (&argv_obstack, target_ilp32 ? "-m32" : "-m64");
-  obstack_ptr_grow (&argv_obstack, "-S");
+  obstack_ptr_grow (&argv_obstack, "-lgomp");
+  char *collect_mkoffload_opts = getenv ("COLLECT_MKOFFLOAD_OPTIONS");
+  if (collect_mkoffload_opts)
+    {
+      char *str = collect_mkoffload_opts;
+      char *p;
+      while ((p = strchr (str, ' ')) != 0)
+	{
+	  *p = '\0';
+	  obstack_ptr_grow (&argv_obstack, str);
+	  str = p + 1;
+	}
+      obstack_ptr_grow (&argv_obstack, str);
+    }
 
+  bool fopenacc = false;
   for (int ix = 1; ix != argc; ix++)
     {
+      if (!strcmp (argv[ix], "-v"))
+	verbose = true;
+      else if (!strcmp (argv[ix], "-save-temps"))
+	save_temps = true;
+      else if (!strcmp (argv[ix], "-fopenacc"))
+	fopenacc = true;
+
       if (!strcmp (argv[ix], "-o") && ix + 1 != argc)
 	outname = argv[++ix];
       else
 	obstack_ptr_grow (&argv_obstack, argv[ix]);
     }
 
-  ptx_name = make_temp_file (".mkoffload");
-  obstack_ptr_grow (&argv_obstack, "-o");
-  obstack_ptr_grow (&argv_obstack, ptx_name);
-  obstack_ptr_grow (&argv_obstack, NULL);
-  const char **new_argv = XOBFINISH (&argv_obstack, const char **);
-
-  char *execpath = getenv ("GCC_EXEC_PREFIX");
-  char *cpath = getenv ("COMPILER_PATH");
-  char *lpath = getenv ("LIBRARY_PATH");
-  unsetenv ("GCC_EXEC_PREFIX");
-  unsetenv ("COMPILER_PATH");
-  unsetenv ("LIBRARY_PATH");
-
-  fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true);
-  obstack_free (&argv_obstack, NULL);
-
-  xputenv (concat ("GCC_EXEC_PREFIX=", execpath, NULL));
-  xputenv (concat ("COMPILER_PATH=", cpath, NULL));
-  xputenv (concat ("LIBRARY_PATH=", lpath, NULL));
-
-  in = fopen (ptx_name, "r");
-  if (!in)
-    fatal_error (input_location, "cannot open intermediate ptx file");
-
   ptx_cfile_name = make_temp_file (".c");
 
   out = fopen (ptx_cfile_name, "w");
   if (!out)
     fatal_error (input_location, "cannot open '%s'", ptx_cfile_name);
 
-  process (in, out);
+  /* We do not support OMP offloading. Don't generate an offload image
+     if we did not see -fopenacc.  */
+  if (fopenacc)
+    {
+      ptx_name = make_temp_file (".mkoffload");
+      obstack_ptr_grow (&argv_obstack, "-o");
+      obstack_ptr_grow (&argv_obstack, ptx_name);
+      obstack_ptr_grow (&argv_obstack, NULL);
+      const char **new_argv = XOBFINISH (&argv_obstack, const char **);
+
+      char *execpath = getenv ("GCC_EXEC_PREFIX");
+      char *cpath = getenv ("COMPILER_PATH");
+      char *lpath = getenv ("LIBRARY_PATH");
+      unsetenv ("GCC_EXEC_PREFIX");
+      unsetenv ("COMPILER_PATH");
+      unsetenv ("LIBRARY_PATH");
+
+      fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true);
+      obstack_free (&argv_obstack, NULL);
+
+      xputenv (concat ("GCC_EXEC_PREFIX=", execpath, NULL));
+      xputenv (concat ("COMPILER_PATH=", cpath, NULL));
+      xputenv (concat ("LIBRARY_PATH=", lpath, NULL));
+
+      in = fopen (ptx_name, "r");
+      if (!in)
+	fatal_error (input_location, "cannot open intermediate ptx file");
+
+      process (in, out);
+    }
+
   fclose (out);
 
   compile_native (ptx_cfile_name, outname, collect_gcc);
diff --git gcc/doc/invoke.texi gcc/doc/invoke.texi
index 9c8aa99..d3ce92b 100644
--- gcc/doc/invoke.texi
+++ gcc/doc/invoke.texi
@@ -490,7 +490,7 @@ Objective-C and Objective-C++ Dialects}.
 -static-libmpx -static-libmpxwrappers @gol
 -shared -shared-libgcc  -symbolic @gol
 -T @var{script}  -Wl,@var{option}  -Xlinker @var{option} @gol
--u @var{symbol} -z @var{keyword}}
+-Xoffload @var{option} -u @var{symbol} -z @var{keyword}}
 
 @item Directory Options
 @xref{Directory Options,,Options for Directory Search}.
@@ -11404,6 +11404,11 @@ syntax than as separate arguments.  For example, you can specify
 @option{-Xlinker -Map -Xlinker output.map}.  Other linkers may not support
 this syntax for command-line options.
 
+@item -Xoffload @var{option}
+@opindex Xoffload
+Pass @var{option} as an option to the mkoffload program during the linking
+phase.  This program is used to generate images for offloaded code.
+
 @item -Wl,@var{option}
 @opindex Wl
 Pass @var{option} as an option to the linker.  If @var{option} contains
diff --git gcc/fortran/ChangeLog.gomp gcc/fortran/ChangeLog.gomp
index deeefd4..76af137 100644
--- gcc/fortran/ChangeLog.gomp
+++ gcc/fortran/ChangeLog.gomp
@@ -1,3 +1,8 @@
+2015-05-13  Bernd Schmidt  <bernds@codesourcery.com>
+
+	* gfortranspec.c (lang_specific_driver): Add -Xoffload options to
+	link -lm and -lgfortran.
+
 2015-05-11  Thomas Schwinge  <thomas@codesourcery.com>
 	    Bernd Schmidt  <bernds@codesourcery.com>
 	    Cesar Philippidis  <cesar@codesourcery.com>
diff --git gcc/fortran/gfortranspec.c gcc/fortran/gfortranspec.c
index 8af4c76..dd59e1c 100644
--- gcc/fortran/gfortranspec.c
+++ gcc/fortran/gfortranspec.c
@@ -403,6 +403,8 @@ For more information about these matters, see the file named COPYING\n\n"));
 	default:
 	  break;
 	}
+      append_option (OPT_Xoffload, "-lm", 1);
+      append_option (OPT_Xoffload, "-lgfortran", 1);
     }
 
 #ifdef ENABLE_SHARED_LIBGCC
diff --git gcc/gcc.c gcc/gcc.c
index d956c36..c06322f 100644
--- gcc/gcc.c
+++ gcc/gcc.c
@@ -1184,6 +1184,11 @@ static vec<char_p> assembler_options;
    These options are accumulated by -Wp,
    and substituted into the preprocessor command with %Z.  */
 static vec<char_p> preprocessor_options;
+
+/* A vector of options to give to mkoffload.
+   These options are accumulated by -Xoffload and place in the
+   COLLECT_MKOFFLOAD_OPTIONS variable.  */
+static vec<char_p> mkoffload_options;
 
 static char *
 skip_whitespace (char *p)
@@ -3202,6 +3207,7 @@ display_help (void)
   fputs (_("  -Xassembler <arg>        Pass <arg> on to the assembler\n"), stdout);
   fputs (_("  -Xpreprocessor <arg>     Pass <arg> on to the preprocessor\n"), stdout);
   fputs (_("  -Xlinker <arg>           Pass <arg> on to the linker\n"), stdout);
+  fputs (_("  -Xoffload <arg>          Pass <arg> to mkoffload via an environment variable\n"), stdout);
   fputs (_("  -save-temps              Do not delete intermediate files\n"), stdout);
   fputs (_("  -save-temps=<arg>        Do not delete intermediate files\n"), stdout);
   fputs (_("\
@@ -3257,6 +3263,12 @@ add_linker_option (const char *option, int len)
 {
   linker_options.safe_push (save_string (option, len));
 }
+
+static void
+add_mkoffload_option (const char *option, int len)
+{
+  mkoffload_options.safe_push (save_string (option, len));
+}
 
 /* Allocate space for an input file in infiles.  */
 
@@ -3696,6 +3708,11 @@ driver_handle_option (struct gcc_options *opts,
       do_save = false;
       break;
 
+    case OPT_Xoffload:
+      add_mkoffload_option (arg, strlen (arg));
+      do_save = false;
+      break;
+
     case OPT_Xpreprocessor:
       add_preprocessor_option (arg, strlen (arg));
       do_save = false;
@@ -4266,7 +4283,7 @@ process_command (unsigned int decoded_options_count,
     }
 
   gcc_assert (!IS_ABSOLUTE_PATH (tooldir_base_prefix));
-  tooldir_prefix2 = concat (tooldir_base_prefix, spec_host_machine,
+  tooldir_prefix2 = concat (tooldir_base_prefix, spec_machine,
 			    dir_separator_str, NULL);
 
   /* Look for tools relative to the location from which the driver is
@@ -4391,6 +4408,23 @@ set_collect_gcc_options (void)
     }
   obstack_grow (&collect_obstack, "\0", 1);
   xputenv (XOBFINISH (&collect_obstack, char *));
+
+#ifdef ENABLE_OFFLOADING
+  /* Build COLLECT_MKOFFLOAD_OPTIONS to have all of the options specified to
+     mkoffload.  */
+  obstack_grow (&collect_obstack, "COLLECT_MKOFFLOAD_OPTIONS=",
+		sizeof ("COLLECT_MKOFFLOAD_OPTIONS=") - 1);
+
+  char_p opt;
+  FOR_EACH_VEC_ELT (mkoffload_options, i, opt)
+    {
+      if (i > 0)
+	obstack_grow (&collect_obstack, " ", 1);
+      obstack_grow (&collect_obstack, opt, strlen (opt));
+    }
+  obstack_grow (&collect_obstack, "\0", 1);
+  xputenv (XOBFINISH (&collect_obstack, char *));
+#endif
 }
 
 /* Process a spec string, accumulating and running commands.  */
diff --git libgcc/ChangeLog.gomp libgcc/ChangeLog.gomp
index bcc4c67..d872575 100644
--- libgcc/ChangeLog.gomp
+++ libgcc/ChangeLog.gomp
@@ -1,3 +1,15 @@
+2015-05-13  Bernd Schmidt  <bernds@codesourcery.com>
+	    Cesar Philippidis  <cesar@codesourcery.com>
+
+	* config.host (nvptx-*): For an offloading build, add libgomp.a
+	and libgomp.spec to extra_parts.
+	* config/nvptx/t-nvptx (gomp-acc_on_device.o, gomp-tids.o)
+	(gomp-atomic.o, libgomp.a, libgomp.spec): New rules.
+	(OBJS_libgomp): New variable.
+	* config/nvptx/gomp-acc_on_device.c: New file.
+	* config/nvptx/gomp-atomic.asm: Likewise.
+	* config/nvptx/gomp-tids.c: Likewise.
+
 2014-09-08  Thomas Schwinge  <thomas@codesourcery.com>
 
 	* configure.ac (enable_accelerator, offload_targets): Remove.
diff --git libgcc/config.host libgcc/config.host
index d558c38..03cac35 100644
--- libgcc/config.host
+++ libgcc/config.host
@@ -1292,7 +1292,11 @@ mep*-*-*)
 	;;
 nvptx-*)
 	tmake_file="$tmake_file nvptx/t-nvptx"
-	extra_parts="crt0.o"
+	if test "x${enable_as_accelerator_for}" != x; then
+		extra_parts="crt0.o libgomp.a libgomp.spec"
+	else
+		extra_parts="crt0.o"
+	fi
 	;;
 *)
 	echo "*** Configuration ${host} not supported" 1>&2
diff --git libgcc/config/nvptx/gomp-acc_on_device.c libgcc/config/nvptx/gomp-acc_on_device.c
new file mode 100644
index 0000000..e4278f9
--- /dev/null
+++ libgcc/config/nvptx/gomp-acc_on_device.c
@@ -0,0 +1,9 @@
+int acc_on_device(int d)
+{
+  return __builtin_acc_on_device(d);
+}
+
+int acc_on_device_h_(int *d)
+{
+  return acc_on_device(*d);
+}
diff --git libgcc/config/nvptx/gomp-atomic.asm libgcc/config/nvptx/gomp-atomic.asm
new file mode 100644
index 0000000..ae9d925
--- /dev/null
+++ libgcc/config/nvptx/gomp-atomic.asm
@@ -0,0 +1,37 @@
+
+// BEGIN PREAMBLE
+	.version	3.1
+	.target	sm_30
+	.address_size 64
+	.extern .shared .u8 sdata[];
+// END PREAMBLE
+
+// BEGIN VAR DEF: libgomp_ptx_lock
+.global .align 4 .u32 libgomp_ptx_lock;
+
+// BEGIN GLOBAL FUNCTION DECL: GOMP_atomic_start
+.visible .func GOMP_atomic_start;
+// BEGIN GLOBAL FUNCTION DEF: GOMP_atomic_start
+.visible .func GOMP_atomic_start
+{
+	.reg .pred 	%p<2>;
+	.reg .s32 	%r<2>;
+	.reg .s64 	%rd<2>;
+BB5_1:
+	mov.u64 	%rd1, libgomp_ptx_lock;
+	atom.global.cas.b32 	%r1, [%rd1], 0, 1;
+	setp.ne.s32	%p1, %r1, 0;
+	@%p1 bra 	BB5_1;
+	ret;
+	}
+// BEGIN GLOBAL FUNCTION DECL: GOMP_atomic_end
+.visible .func GOMP_atomic_end;
+// BEGIN GLOBAL FUNCTION DEF: GOMP_atomic_end
+.visible .func GOMP_atomic_end
+{
+	.reg .s32 	%r<2>;
+	.reg .s64 	%rd<2>;
+	mov.u64 	%rd1, libgomp_ptx_lock;
+	atom.global.exch.b32 	%r1, [%rd1], 0;
+	ret;
+	}
diff --git libgcc/config/nvptx/gomp-tids.c libgcc/config/nvptx/gomp-tids.c
new file mode 100644
index 0000000..b017b0d
--- /dev/null
+++ libgcc/config/nvptx/gomp-tids.c
@@ -0,0 +1,66 @@
+/* Each gang consists of 'worker' threads.  Each worker has 'vector'
+   threads.
+
+   gang, worker and vector mapping functions:
+
+   *tid (0) => vector dimension
+   *tid (1) => worker dimension
+   *ctaid (0) = gang dimension
+
+   FIXME: these functions assume that the gang, worker and vector parameters
+   are 0 or 1.  To generalize these functions, we should use -1 to indicate,
+   say, that a gang clause was used without its optional argument.  In this
+   case, gang should correspond to ctaid(0), i.e., the num_gangs parameter
+   passed to cuLaunchKernel.
+
+   tid = [0, ntid-1]
+   ntid = [1...threads_per_dimension]
+*/
+
+int __attribute__ ((used))
+GOACC_get_num_threads (int gang, int worker, int vector)
+{
+  int vsize = vector * __builtin_GOACC_ntid (0);
+  int wsize = worker * __builtin_GOACC_ntid (1);
+  int gsize = gang * __builtin_GOACC_nctaid (0);
+  int size = 1;
+
+  if (vector)
+    size *= __builtin_GOACC_ntid (0);
+
+  if (worker)
+    size *= __builtin_GOACC_ntid (1);
+
+  if (gang)
+    size *= __builtin_GOACC_nctaid (0);
+
+  return size;
+}
+
+int __attribute__ ((used))
+GOACC_get_thread_num (int gang, int worker, int vector)
+{
+  int tid = 0;
+  int ws = __builtin_GOACC_ntid (1);
+  int vs = __builtin_GOACC_ntid (0);
+  int gid = __builtin_GOACC_ctaid (0);
+  int wid = __builtin_GOACC_tid (1);
+  int vid = __builtin_GOACC_tid (0);
+
+  if (gang && worker && vector)
+    tid = gid * ws * vs + vs * wid + vid;
+  else if (gang && !worker && vector)
+    tid = vs * gid + vid;
+  else if (gang && worker && !vector)
+    tid = ws * gid + wid;
+  else if (!gang && worker && vector)
+    tid = vs * wid + vid;
+  else if (!gang && !worker && vector)
+    tid = vid;
+  else if (!gang && worker && !vector)
+    tid = wid;
+  else if (gang && !worker && !vector)
+    tid = gid;
+
+  return tid;
+}
diff --git libgcc/config/nvptx/t-nvptx libgcc/config/nvptx/t-nvptx
index 34d68cc..a9e56a9 100644
--- libgcc/config/nvptx/t-nvptx
+++ libgcc/config/nvptx/t-nvptx
@@ -12,3 +12,16 @@ crt0.o: $(srcdir)/config/nvptx/crt0.s
 # support it, and it may cause the build to fail, because of alloca usage, for
 # example.
 INHIBIT_LIBC_CFLAGS = -Dinhibit_libc
+
+gomp-acc_on_device.o: $(srcdir)/config/nvptx/gomp-acc_on_device.c
+	$(gcc_compile) -c -fno-builtin-acc_on_device $<
+gomp-tids.o: $(srcdir)/config/nvptx/gomp-tids.c
+	$(gcc_compile) -c -fopenacc -O $<
+gomp-atomic.o: $(srcdir)/config/nvptx/gomp-atomic.asm
+	cp $< $@
+
+OBJS_libgomp= gomp-acc_on_device.o gomp-tids.o gomp-atomic.o
+libgomp.a: $(OBJS_libgomp)
+	$(AR_CREATE_FOR_TARGET) $@ $(OBJS_libgomp)
+libgomp.spec:
+	echo "*link_gomp: -lgomp" >$@
diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp
index f20cab3..6ce67c6 100644
--- libgomp/ChangeLog.gomp
+++ libgomp/ChangeLog.gomp
@@ -1,3 +1,19 @@
+2015-05-13  Thomas Schwinge  <thomas@codesourcery.com>
+	    Bernd Schmidt  <bernds@codesourcery.com>
+	    Cesar Philippidis  <cesar@codesourcery.com>
+
+	* oacc-ptx.h: Remove file.
+	* plugin/plugin-nvptx.c: Don't include it.
+	(link_ptx): Accept a length argument.  Don't add predefined bits of
+	PTX code.  Look for NUL characters as file boundaries in the input
+	and link the multiple PTX files.
+	(GOMP_OFFLOAD_load_image): Get the size of PTX code from the table
+	and pass it to link_ptx.
+	* testsuite/libgomp.fortran/fortran.exp (lang_link_flags): Add
+	"-Xoffload -lgfortran -Xoffload -lm".
+	* testsuite/libgomp.oacc-fortran/fortran.exp (lang_link_flags):
+	Likewise.
+
 2015-05-11  Thomas Schwinge  <thomas@codesourcery.com>
 	    Bernd Schmidt  <bernds@codesourcery.com>
 	    Cesar Philippidis  <cesar@codesourcery.com>
diff --git libgomp/oacc-ptx.h libgomp/oacc-ptx.h
deleted file mode 100644
index 104f297..0000000
--- libgomp/oacc-ptx.h
+++ /dev/null
@@ -1,454 +0,0 @@
-/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
-
-   Contributed by Mentor Embedded.
-
-   This file is part of the GNU Offloading and Multi Processing Library
-   (libgomp).
-
-   Libgomp is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-   more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define ABORT_PTX				\
-  ".version 3.1\n"				\
-  ".target sm_30\n"				\
-  ".address_size 64\n"				\
-  ".visible .func abort;\n"			\
-  ".visible .func abort\n"			\
-  "{\n"						\
-  "trap;\n"					\
-  "ret;\n"					\
-  "}\n"						\
-  ".visible .func _gfortran_abort;\n"		\
-  ".visible .func _gfortran_abort\n"		\
-  "{\n"						\
-  "trap;\n"					\
-  "ret;\n"					\
-  "}\n" \
-
-/* Generated with:
-
-   $ echo 'int acc_on_device(int d) { return __builtin_acc_on_device(d); } int acc_on_device_h_(int *d) { return acc_on_device(*d); }' | accel-gcc/xgcc -Baccel-gcc -x c - -o - -S -m64 -O3 -fno-builtin-acc_on_device -fno-inline
-*/
-#define ACC_ON_DEVICE_PTX						\
-  "        .version        3.1\n"					\
-  "        .target sm_30\n"						\
-  "        .address_size 64\n"						\
-  ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1);\n" \
-  ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1)\n" \
-  "{\n"									\
-  "        .reg.u32 %ar1;\n"						\
-  ".reg.u32 %retval;\n"							\
-  "        .reg.u64 %hr10;\n"						\
-  "        .reg.u32 %r24;\n"						\
-  "        .reg.u32 %r25;\n"						\
-  "        .reg.pred %r27;\n"						\
-  "        .reg.u32 %r30;\n"						\
-  "        ld.param.u32 %ar1, [%in_ar1];\n"				\
-  "                mov.u32 %r24, %ar1;\n"				\
-  "                setp.ne.u32 %r27,%r24,4;\n"				\
-  "                set.u32.eq.u32 %r30,%r24,5;\n"			\
-  "                neg.s32 %r25, %r30;\n"				\
-  "        @%r27   bra     $L3;\n"					\
-  "                mov.u32 %r25, 1;\n"					\
-  "$L3:\n"								\
-  "                mov.u32 %retval, %r25;\n"				\
-  "        st.param.u32    [%out_retval], %retval;\n"			\
-  "        ret;\n"							\
-  "        }\n"								\
-  ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1);\n" \
-  ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1)\n" \
-  "{\n"									\
-  "        .reg.u64 %ar1;\n"						\
-  ".reg.u32 %retval;\n"							\
-  "        .reg.u64 %hr10;\n"						\
-  "        .reg.u64 %r25;\n"						\
-  "        .reg.u32 %r26;\n"						\
-  "        .reg.u32 %r27;\n"						\
-  "        ld.param.u64 %ar1, [%in_ar1];\n"				\
-  "                mov.u64 %r25, %ar1;\n"				\
-  "                ld.u32  %r26, [%r25];\n"				\
-  "        {\n"								\
-  "                .param.u32 %retval_in;\n"				\
-  "        {\n"								\
-  "                .param.u32 %out_arg0;\n"				\
-  "                st.param.u32 [%out_arg0], %r26;\n"			\
-  "                call (%retval_in), acc_on_device, (%out_arg0);\n"	\
-  "        }\n"								\
-  "                ld.param.u32    %r27, [%retval_in];\n"		\
-  "}\n"									\
-  "                mov.u32 %retval, %r27;\n"				\
-  "        st.param.u32    [%out_retval], %retval;\n"			\
-  "        ret;\n"							\
-  "        }"
-
- #define GOACC_INTERNAL_PTX						\
-  ".version 3.1\n" \
-  ".target sm_30\n" \
-  ".address_size 64\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n" \
-  ".extern .func abort;\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n" \
-  "{\n" \
-  ".reg .u32 %ar1;\n" \
-  ".reg .u32 %retval;\n" \
-  ".reg .u64 %hr10;\n" \
-  ".reg .u32 %r22;\n" \
-  ".reg .u32 %r23;\n" \
-  ".reg .u32 %r24;\n" \
-  ".reg .u32 %r25;\n" \
-  ".reg .u32 %r26;\n" \
-  ".reg .u32 %r27;\n" \
-  ".reg .u32 %r28;\n" \
-  ".reg .u32 %r29;\n" \
-  ".reg .pred %r30;\n" \
-  ".reg .u32 %r31;\n" \
-  ".reg .pred %r32;\n" \
-  ".reg .u32 %r33;\n" \
-  ".reg .pred %r34;\n" \
-  ".local .align 8 .b8 %frame[4];\n" \
-  "ld.param.u32 %ar1,[%in_ar1];\n" \
-  "mov.u32 %r27,%ar1;\n" \
-  "st.local.u32 [%frame],%r27;\n" \
-  "ld.local.u32 %r28,[%frame];\n" \
-  "mov.u32 %r29,1;\n"							\
-  "setp.eq.u32 %r30,%r28,%r29;\n"					\
-  "@%r30 bra $L4;\n"							\
-  "mov.u32 %r31,2;\n"							\
-  "setp.eq.u32 %r32,%r28,%r31;\n"					\
-  "@%r32 bra $L5;\n"							\
-  "mov.u32 %r33,0;\n"							\
-  "setp.eq.u32 %r34,%r28,%r33;\n"					\
-  "@!%r34 bra $L8;\n"							\
-  "mov.u32 %r23,%tid.x;\n"						\
-  "mov.u32 %r22,%r23;\n"						\
-  "bra $L7;\n"								\
-  "$L4:\n"								\
-  "mov.u32 %r24,%tid.y;\n"						\
-  "mov.u32 %r22,%r24;\n"						\
-  "bra $L7;\n"								\
-  "$L5:\n"								\
-  "mov.u32 %r25,%tid.z;\n"						\
-  "mov.u32 %r22,%r25;\n"						\
-  "bra $L7;\n"								\
-  "$L8:\n"								\
-  "{\n"									\
-  "{\n"									\
-  "call abort;\n"							\
-  "}\n"									\
-  "}\n"									\
-  "$L7:\n"								\
-  "mov.u32 %r26,%r22;\n"						\
-  "mov.u32 %retval,%r26;\n"						\
-  "st.param.u32 [%out_retval],%retval;\n"				\
-  "ret;\n"								\
-  "}\n"									\
-  ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n" \
-  "{\n"									\
-  ".reg .u32 %ar1;\n"							\
-  ".reg .u32 %retval;\n"						\
-  ".reg .u64 %hr10;\n"							\
-  ".reg .u32 %r22;\n"							\
-  ".reg .u32 %r23;\n"							\
-  ".reg .u32 %r24;\n"							\
-  ".reg .u32 %r25;\n"							\
-  ".reg .u32 %r26;\n"							\
-  ".reg .u32 %r27;\n"							\
-  ".reg .u32 %r28;\n"							\
-  ".reg .u32 %r29;\n"							\
-  ".reg .pred %r30;\n"							\
-  ".reg .u32 %r31;\n"							\
-  ".reg .pred %r32;\n"							\
-  ".reg .u32 %r33;\n"							\
-  ".reg .pred %r34;\n"							\
-  ".local .align 8 .b8 %frame[4];\n"					\
-  "ld.param.u32 %ar1,[%in_ar1];\n"					\
-  "mov.u32 %r27,%ar1;\n"						\
-  "st.local.u32 [%frame],%r27;\n"					\
-  "ld.local.u32 %r28,[%frame];\n"					\
-  "mov.u32 %r29,1;\n"							\
-  "setp.eq.u32 %r30,%r28,%r29;\n"					\
-  "@%r30 bra $L11;\n"							\
-  "mov.u32 %r31,2;\n"							\
-  "setp.eq.u32 %r32,%r28,%r31;\n"					\
-  "@%r32 bra $L12;\n"							\
-  "mov.u32 %r33,0;\n"							\
-  "setp.eq.u32 %r34,%r28,%r33;\n"					\
-  "@!%r34 bra $L15;\n"							\
-  "mov.u32 %r23,%ntid.x;\n"						\
-  "mov.u32 %r22,%r23;\n"						\
-  "bra $L14;\n"								\
-  "$L11:\n"								\
-  "mov.u32 %r24,%ntid.y;\n"						\
-  "mov.u32 %r22,%r24;\n"						\
-  "bra $L14;\n"								\
-  "$L12:\n"								\
-  "mov.u32 %r25,%ntid.z;\n"						\
-  "mov.u32 %r22,%r25;\n"						\
-  "bra $L14;\n"								\
-  "$L15:\n"								\
-  "{\n"									\
-  "{\n"									\
-  "call abort;\n"							\
-  "}\n"									\
-  "}\n"									\
-  "$L14:\n"								\
-  "mov.u32 %r26,%r22;\n"						\
-  "mov.u32 %retval,%r26;\n"						\
-  "st.param.u32 [%out_retval],%retval;\n"				\
-  "ret;\n"								\
-  "}\n"									\
-  ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n" \
-  "{\n"									\
-  ".reg .u32 %ar1;\n"							\
-  ".reg .u32 %retval;\n"						\
-  ".reg .u64 %hr10;\n"							\
-  ".reg .u32 %r22;\n"							\
-  ".reg .u32 %r23;\n"							\
-  ".reg .u32 %r24;\n"							\
-  ".reg .u32 %r25;\n"							\
-  ".reg .u32 %r26;\n"							\
-  ".reg .u32 %r27;\n"							\
-  ".reg .u32 %r28;\n"							\
-  ".reg .u32 %r29;\n"							\
-  ".reg .pred %r30;\n"							\
-  ".reg .u32 %r31;\n"							\
-  ".reg .pred %r32;\n"							\
-  ".reg .u32 %r33;\n"							\
-  ".reg .pred %r34;\n"							\
-  ".local .align 8 .b8 %frame[4];\n"					\
-  "ld.param.u32 %ar1,[%in_ar1];\n"					\
-  "mov.u32 %r27,%ar1;\n"						\
-  "st.local.u32 [%frame],%r27;\n"					\
-  "ld.local.u32 %r28,[%frame];\n"					\
-  "mov.u32 %r29,1;\n"							\
-  "setp.eq.u32 %r30,%r28,%r29;\n"					\
-  "@%r30 bra $L18;\n"							\
-  "mov.u32 %r31,2;\n"							\
-  "setp.eq.u32 %r32,%r28,%r31;\n"					\
-  "@%r32 bra $L19;\n"							\
-  "mov.u32 %r33,0;\n"							\
-  "setp.eq.u32 %r34,%r28,%r33;\n"					\
-  "@!%r34 bra $L22;\n"							\
-  "mov.u32 %r23,%ctaid.x;\n"						\
-  "mov.u32 %r22,%r23;\n"						\
-  "bra $L21;\n"								\
-  "$L18:\n"								\
-  "mov.u32 %r24,%ctaid.y;\n"						\
-  "mov.u32 %r22,%r24;\n"						\
-  "bra $L21;\n"								\
-  "$L19:\n"								\
-  "mov.u32 %r25,%ctaid.z;\n"						\
-  "mov.u32 %r22,%r25;\n"						\
-  "bra $L21;\n"								\
-  "$L22:\n"								\
-  "{\n"									\
-  "{\n"									\
-  "call abort;\n"							\
-  "}\n"									\
-  "}\n"									\
-  "$L21:\n"								\
-  "mov.u32 %r26,%r22;\n"						\
-  "mov.u32 %retval,%r26;\n"						\
-  "st.param.u32 [%out_retval],%retval;\n"				\
-  "ret;\n"								\
-  "}\n"									\
-  ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n" \
-  "{\n"									\
-  ".reg .u32 %ar1;\n"							\
-  ".reg .u32 %retval;\n"						\
-  ".reg .u64 %hr10;\n"							\
-  ".reg .u32 %r22;\n"							\
-  ".reg .u32 %r23;\n"							\
-  ".reg .u32 %r24;\n"							\
-  ".reg .u32 %r25;\n"							\
-  ".reg .u32 %r26;\n"							\
-  ".reg .u32 %r27;\n"							\
-  ".reg .u32 %r28;\n"							\
-  ".reg .u32 %r29;\n"							\
-  ".reg .pred %r30;\n"							\
-  ".reg .u32 %r31;\n"							\
-  ".reg .pred %r32;\n"							\
-  ".reg .u32 %r33;\n"							\
-  ".reg .pred %r34;\n"							\
-  ".local .align 8 .b8 %frame[4];\n"					\
-  "ld.param.u32 %ar1,[%in_ar1];\n"					\
-  "mov.u32 %r27,%ar1;\n"						\
-  "st.local.u32 [%frame],%r27;\n"					\
-  "ld.local.u32 %r28,[%frame];\n"					\
-  "mov.u32 %r29,1;\n"							\
-  "setp.eq.u32 %r30,%r28,%r29;\n"					\
-  "@%r30 bra $L25;\n"							\
-  "mov.u32 %r31,2;\n"							\
-  "setp.eq.u32 %r32,%r28,%r31;\n"					\
-  "@%r32 bra $L26;\n"							\
-  "mov.u32 %r33,0;\n"							\
-  "setp.eq.u32 %r34,%r28,%r33;\n"					\
-  "@!%r34 bra $L29;\n"							\
-  "mov.u32 %r23,%nctaid.x;\n"						\
-  "mov.u32 %r22,%r23;\n"						\
-  "bra $L28;\n"								\
-  "$L25:\n"								\
-  "mov.u32 %r24,%nctaid.y;\n"						\
-  "mov.u32 %r22,%r24;\n"						\
-  "bra $L28;\n"								\
-  "$L26:\n"								\
-  "mov.u32 %r25,%nctaid.z;\n"						\
-  "mov.u32 %r22,%r25;\n"						\
-  "bra $L28;\n"								\
-  "$L29:\n"								\
-  "{\n"									\
-  "{\n"									\
-  "call abort;\n"							\
-  "}\n"									\
-  "}\n"									\
-  "$L28:\n"								\
-  "mov.u32 %r26,%r22;\n"						\
-  "mov.u32 %retval,%r26;\n"						\
-  "st.param.u32 [%out_retval],%retval;\n"				\
-  "ret;\n"								\
-  "}\n"									\
-  ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n"	\
-  "{\n"									\
-  ".reg .u32 %retval;\n"						\
-  ".reg .u64 %hr10;\n"							\
-  ".reg .u32 %r22;\n"							\
-  ".reg .u32 %r23;\n"							\
-  ".reg .u32 %r24;\n"							\
-  ".reg .u32 %r25;\n"							\
-  ".reg .u32 %r26;\n"							\
-  ".reg .u32 %r27;\n"							\
-  ".reg .u32 %r28;\n"							\
-  ".reg .u32 %r29;\n"							\
-  "mov.u32 %r26,0;\n"							\
-  "{\n"									\
-  ".param .u32 %retval_in;\n"						\
-  "{\n"									\
-  ".param .u32 %out_arg0;\n"						\
-  "st.param.u32 [%out_arg0],%r26;\n"					\
-  "call (%retval_in),GOACC_ntid,(%out_arg0);\n"				\
-  "}\n"									\
-  "ld.param.u32 %r27,[%retval_in];\n"					\
-  "}\n"									\
-  "mov.u32 %r22,%r27;\n"						\
-  "mov.u32 %r28,0;\n"							\
-  "{\n"									\
-  ".param .u32 %retval_in;\n"						\
-  "{\n"									\
-  ".param .u32 %out_arg0;\n"						\
-  "st.param.u32 [%out_arg0],%r28;\n"					\
-  "call (%retval_in),GOACC_nctaid,(%out_arg0);\n"			\
-  "}\n"									\
-  "ld.param.u32 %r29,[%retval_in];\n"					\
-  "}\n"									\
-  "mov.u32 %r23,%r29;\n"						\
-  "mul.lo.u32 %r24,%r22,%r23;\n"					\
-  "mov.u32 %r25,%r24;\n"						\
-  "mov.u32 %retval,%r25;\n"						\
-  "st.param.u32 [%out_retval],%retval;\n"				\
-  "ret;\n"								\
-  "}\n"									\
-  ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n"	\
-  "{\n"									\
-  ".reg .u32 %retval;\n"						\
-  ".reg .u64 %hr10;\n"							\
-  ".reg .u32 %r22;\n"							\
-  ".reg .u32 %r23;\n"							\
-  ".reg .u32 %r24;\n"							\
-  ".reg .u32 %r25;\n"							\
-  ".reg .u32 %r26;\n"							\
-  ".reg .u32 %r27;\n"							\
-  ".reg .u32 %r28;\n"							\
-  ".reg .u32 %r29;\n"							\
-  ".reg .u32 %r30;\n"							\
-  ".reg .u32 %r31;\n"							\
-  ".reg .u32 %r32;\n"							\
-  ".reg .u32 %r33;\n"							\
-  "mov.u32 %r28,0;\n"							\
-  "{\n"									\
-  ".param .u32 %retval_in;\n"						\
-  "{\n"									\
-  ".param .u32 %out_arg0;\n"						\
-  "st.param.u32 [%out_arg0],%r28;\n"					\
-  "call (%retval_in),GOACC_ntid,(%out_arg0);\n"				\
-  "}\n"									\
-  "ld.param.u32 %r29,[%retval_in];\n"					\
-  "}\n"									\
-  "mov.u32 %r22,%r29;\n"						\
-  "mov.u32 %r30,0;\n"							\
-  "{\n"									\
-  ".param .u32 %retval_in;\n"						\
-  "{\n"									\
-  ".param .u32 %out_arg0;\n"						\
-  "st.param.u32 [%out_arg0],%r30;\n"					\
-  "call (%retval_in),GOACC_ctaid,(%out_arg0);\n"			\
-  "}\n"									\
-  "ld.param.u32 %r31,[%retval_in];\n"					\
-  "}\n"									\
-  "mov.u32 %r23,%r31;\n"						\
-  "mul.lo.u32 %r24,%r22,%r23;\n"					\
-  "mov.u32 %r32,0;\n"							\
-  "{\n"									\
-  ".param .u32 %retval_in;\n"						\
-  "{\n"									\
-  ".param .u32 %out_arg0;\n"						\
-  "st.param.u32 [%out_arg0],%r32;\n"					\
-  "call (%retval_in),GOACC_tid,(%out_arg0);\n"				\
-  "}\n"									\
-  "ld.param.u32 %r33,[%retval_in];\n"					\
-  "}\n"									\
-  "mov.u32 %r25,%r33;\n"						\
-  "add.u32 %r26,%r24,%r25;\n"						\
-  "mov.u32 %r27,%r26;\n"						\
-  "mov.u32 %retval,%r27;\n"						\
-  "st.param.u32 [%out_retval],%retval;\n"				\
-  "ret;\n"								\
-  "}\n"
-
- #define GOMP_ATOMIC_PTX \
-  ".version 3.1\n" \
-  ".target sm_30\n" \
-  ".address_size 64\n" \
-  ".global .align 4 .u32 libgomp_ptx_lock;\n" \
-  ".visible .func GOMP_atomic_start;\n" \
-  ".visible .func GOMP_atomic_start\n" \
-  "{\n" \
-  "  .reg .pred    %p<2>;\n" \
-  "  .reg .s32     %r<2>;\n" \
-  "  .reg .s64     %rd<2>;\n" \
-  "BB5_1:\n" \
-  "  mov.u64       %rd1, libgomp_ptx_lock;\n" \
-  "  atom.global.cas.b32   %r1, [%rd1], 0, 1;\n" \
-  "  setp.ne.s32   %p1, %r1, 0;\n" \
-  "  @%p1 bra      BB5_1;\n" \
-  "  ret;\n" \
-  "}\n" \
-  ".visible .func GOMP_atomic_end;\n" \
-  ".visible .func GOMP_atomic_end\n" \
-  "{\n" \
-  "  .reg .s32     %r<2>;\n" \
-  "  .reg .s64     %rd<2>;\n" \
-  "  mov.u64       %rd1, libgomp_ptx_lock;\n" \
-  "  atom.global.exch.b32  %r1, [%rd1], 0;\n" \
-  "  ret;\n" \
-  "}\n"
diff --git libgomp/plugin/plugin-nvptx.c libgomp/plugin/plugin-nvptx.c
index ad1163d..7d34b9c 100644
--- libgomp/plugin/plugin-nvptx.c
+++ libgomp/plugin/plugin-nvptx.c
@@ -34,7 +34,6 @@
 #include "openacc.h"
 #include "config.h"
 #include "libgomp-plugin.h"
-#include "oacc-ptx.h"
 #include "oacc-plugin.h"
 
 #include <pthread.h>
@@ -793,7 +792,7 @@ nvptx_get_num_devices (void)
 
 
 static void
-link_ptx (CUmodule *module, char *ptx_code)
+link_ptx (CUmodule *module, char *ptx_code, size_t length)
 {
   CUjit_option opts[7];
   void *optvals[7];
@@ -834,63 +833,38 @@ link_ptx (CUmodule *module, char *ptx_code)
   if (r != CUDA_SUCCESS)
     GOMP_PLUGIN_fatal ("cuLinkCreate error: %s", cuda_error (r));
 
-  char *abort_ptx = ABORT_PTX;
-  r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, abort_ptx,
-		     strlen (abort_ptx) + 1, 0, 0, 0, 0);
-  if (r != CUDA_SUCCESS)
-    {
-      GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
-      GOMP_PLUGIN_fatal ("cuLinkAddData (abort) error: %s", cuda_error (r));
-    }
-
-  char *acc_on_device_ptx = ACC_ON_DEVICE_PTX;
-  r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, acc_on_device_ptx,
-		     strlen (acc_on_device_ptx) + 1, 0, 0, 0, 0);
-  if (r != CUDA_SUCCESS)
-    {
-      GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
-      GOMP_PLUGIN_fatal ("cuLinkAddData (acc_on_device) error: %s",
-			 cuda_error (r));
-    }
-
-  char *goacc_internal_ptx = GOACC_INTERNAL_PTX;
-  r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, goacc_internal_ptx,
-		     strlen (goacc_internal_ptx) + 1, 0, 0, 0, 0);
-  if (r != CUDA_SUCCESS)
-    {
-      GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
-      GOMP_PLUGIN_fatal ("cuLinkAddData (goacc_internal_ptx) error: %s",
-			 cuda_error (r));
-    }
-
-  char *gomp_atomic_ptx = GOMP_ATOMIC_PTX;
-  r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, gomp_atomic_ptx,
-		     strlen (gomp_atomic_ptx) + 1, 0, 0, 0, 0);
-  if (r != CUDA_SUCCESS)
-    {
-      GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
-      GOMP_PLUGIN_fatal ("cuLinkAddData (gomp_atomic_ptx) error: %s",
-			 cuda_error (r));
-    }
-
-  r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, ptx_code,
-              strlen (ptx_code) + 1, 0, 0, 0, 0);
-  if (r != CUDA_SUCCESS)
-    {
-      GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
-      GOMP_PLUGIN_fatal ("cuLinkAddData (ptx_code) error: %s", cuda_error (r));
+  size_t off = 0;
+  while (off < length)
+    {
+      int l = strlen (ptx_code + off);
+      r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, ptx_code + off, l + 1,
+			 0, 0, 0, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
+	  GOMP_PLUGIN_fatal ("cuLinkAddData (ptx_code) error: %s", cuda_error (r));
+	}
+
+      off += l;
+      while (off < length && ptx_code[off] == '\0')
+	off++;
     }
 
   r = cuLinkComplete (linkstate, &linkout, &linkoutsize);
-  if (r != CUDA_SUCCESS)
-    GOMP_PLUGIN_fatal ("cuLinkComplete error: %s", cuda_error (r));
 
   GOMP_PLUGIN_debug (0, "Link complete: %fms\n", elapsed);
   GOMP_PLUGIN_debug (0, "Link log %s\n", &ilog[0]);
 
+  if (r != CUDA_SUCCESS)
+    GOMP_PLUGIN_fatal ("cuLinkComplete error: %s", cuda_error (r));
+
   r = cuModuleLoadData (module, linkout);
   if (r != CUDA_SUCCESS)
     GOMP_PLUGIN_fatal ("cuModuleLoadData error: %s", cuda_error (r));
+
+  r = cuLinkDestroy (linkstate);
+  if (r != CUDA_SUCCESS)
+    GOMP_PLUGIN_fatal ("cuLinkDestory error: %s", cuda_error (r));
 }
 
 static void
@@ -1633,7 +1607,7 @@ GOMP_OFFLOAD_load_image (int ord, void *target_data,
 
   nvptx_attach_host_thread_to_device (ord);
 
-  link_ptx (&module, img_header[0]);
+  link_ptx (&module, img_header[0], (size_t) img_header[1]);
 
   pthread_mutex_lock (&ptx_image_lock);
   new_image = GOMP_PLUGIN_malloc (sizeof (struct ptx_image_data));
@@ -1647,18 +1621,19 @@ GOMP_OFFLOAD_load_image (int ord, void *target_data,
      each offload image:
 
      img_header[0] -> ptx code
-     img_header[1] -> number of variables
-     img_header[2] -> array of variable names (pointers to strings)
-     img_header[3] -> number of kernels
-     img_header[4] -> array of kernel names (pointers to strings)
+     img_header[1] -> size of ptx code
+     img_header[2] -> number of variables
+     img_header[3] -> array of variable names (pointers to strings)
+     img_header[4] -> number of kernels
+     img_header[5] -> array of kernel names (pointers to strings)
 
      The array of kernel names and the functions addresses form a
      one-to-one correspondence.  */
 
-  var_entries = (uintptr_t) img_header[1];
-  var_names = (char **) img_header[2];
-  fn_entries = (uintptr_t) img_header[3];
-  fn_names = (char **) img_header[4];
+  var_entries = (uintptr_t) img_header[2];
+  var_names = (char **) img_header[3];
+  fn_entries = (uintptr_t) img_header[4];
+  fn_names = (char **) img_header[5];
 
   *target_table = GOMP_PLUGIN_malloc (sizeof (struct addr_pair)
 				      * (fn_entries + var_entries));
diff --git libgomp/testsuite/libgomp.fortran/fortran.exp libgomp/testsuite/libgomp.fortran/fortran.exp
index 9e6b643..f684abc 100644
--- libgomp/testsuite/libgomp.fortran/fortran.exp
+++ libgomp/testsuite/libgomp.fortran/fortran.exp
@@ -7,7 +7,10 @@ global ALWAYS_CFLAGS
 
 set shlib_ext [get_shlib_extension]
 set lang_library_path	"../libgfortran/.libs"
-set lang_link_flags	"-lgfortran"
+#TODO
+# We're not using the gfortran driver, so have to mimic its behavior
+# here.
+set lang_link_flags	"-lgfortran -Xoffload -lgfortran -Xoffload -lm"
 if [info exists lang_include_flags] then {
     unset lang_include_flags
 }
diff --git libgomp/testsuite/libgomp.oacc-fortran/fortran.exp libgomp/testsuite/libgomp.oacc-fortran/fortran.exp
index a68e039..11655a1 100644
--- libgomp/testsuite/libgomp.oacc-fortran/fortran.exp
+++ libgomp/testsuite/libgomp.oacc-fortran/fortran.exp
@@ -9,7 +9,10 @@ global ALWAYS_CFLAGS
 
 set shlib_ext [get_shlib_extension]
 set lang_library_path	"../libgfortran/.libs"
-set lang_link_flags	"-lgfortran"
+#TODO
+# We're not using the gfortran driver, so have to mimic its behavior
+# here.
+set lang_link_flags	"-lgfortran -Xoffload -lgfortran -Xoffload -lm"
 if [info exists lang_include_flags] then {
     unset lang_include_flags
 }


GrÃÃe,
 Thomas

Attachment: pgpUSmX0MwMtp.pgp
Description: PGP signature


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]