This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[gomp3] unsigned, long long and pointer omp for loops

From: Jakub Jelinek <jakub at redhat dot com>
To: gcc-patches at gcc dot gnu dot org
Cc: Richard Henderson <rth at redhat dot com>, Diego Novillo <dnovillo at google dot com>
Date: Thu, 15 May 2008 13:06:33 -0400
Subject: [gomp3] unsigned, long long and pointer omp for loops
Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!

OpenMP 3.0 allows #pragma omp for iterators to be unsigned, in C++
random access iterators include also pointers and newly since the draft
the final standard allows also pointer iterators in C.
The following patch adds support for all those iterators, in normal
as well as collapsed omp for loops.
Unsigned vars, at least in some cases, can't be handled just by biasing
the unsigned n1/n2 into signed type and for unsigned we can't use
"sign" of increment as flag whether it is a < or > loop.
On 32-bit arches long long iterators weren't supported either.
This patch thus adds unsigned long long iterator APIs to
libgomp in addition to the current long iterator APIs, and signed long long
on 32-bit arches is handled by biasing.

Regtested on x86_64-linux, both --target_board=unix/-m64 and
--target_board=unix/-m32.

2008-05-15  Jakub Jelinek  <jakub@redhat.com>

	* c-parser.c (c_parser_omp_for_loop): Call
	default_function_array_conversion on init.
	* builtin-types.def (BT_PTR_ULONGLONG,
	BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR,
	BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULLPTR_ULLPTR,
	BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULL_ULLPTR_ULLPTR): New.
	* omp-builtins.def (BUILT_IN_GOMP_LOOP_ULL_STATIC_START,
	BUILT_IN_GOMP_LOOP_ULL_DYNAMIC_START,
	BUILT_IN_GOMP_LOOP_ULL_GUIDED_START,
	BUILT_IN_GOMP_LOOP_ULL_AUTO_START,
	BUILT_IN_GOMP_LOOP_ULL_RUNTIME_START,
	BUILT_IN_GOMP_LOOP_ULL_ORDERED_STATIC_START,
	BUILT_IN_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START,
	BUILT_IN_GOMP_LOOP_ULL_ORDERED_GUIDED_START,
	BUILT_IN_GOMP_LOOP_ULL_ORDERED_AUTO_START,
	BUILT_IN_GOMP_LOOP_ULL_ORDERED_RUNTIME_START,
	BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT,
	BUILT_IN_GOMP_LOOP_ULL_DYNAMIC_NEXT,
	BUILT_IN_GOMP_LOOP_ULL_GUIDED_NEXT,
	BUILT_IN_GOMP_LOOP_ULL_AUTO_NEXT,
	BUILT_IN_GOMP_LOOP_ULL_RUNTIME_NEXT,
	BUILT_IN_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT,
	BUILT_IN_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT,
	BUILT_IN_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT,
	BUILT_IN_GOMP_LOOP_ULL_ORDERED_AUTO_NEXT,
	BUILT_IN_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT): New builtins.
	* gimplify.c (gimplify_omp_for): Allow pointer type
	for decl, handle POINTER_PLUS_EXPR.
	* omp-low.c (struct omp_for_data): Add iter_type field.
	(extract_omp_for_data): Handle pointer and unsigned iterators.
	Compute fd->iter_type.  Handle POINTER_PLUS_EXPR increments.
	(workshare_safe_to_combine_p): Disallow combined for if
	iter_type is unsigned long long.
	(expand_omp_for_generic): Handle pointer, unsigned and long long
	iterators.
	(expand_omp_for_static_nochunk, expand_omp_for_static_chunk):
	Likewise.
	(expand_omp_for): Use GOMP_loop_ull*{start,next} if iter_type
	is unsigned long long.
	* c-omp.c (c_finish_omp_for): Allow pointer iterators.  Remove
	warning about unsigned iterators.
cp/
	* semantics.c (finish_omp_for): Allow pointer iterators.
fortran/
	* types.def (BT_ULONGLONG, BT_PTR_ULONGLONG,
	BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR,
	BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULLPTR_ULLPTR,
	BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULL_ULLPTR_ULLPTR):
	New.
testsuite/
	* gcc.dg/gomp/pr27499.c (foo): Remove is unsigned dg-warning.
	* g++.dg/gomp/pr27499.C (foo): Likewise.
	* g++.dg/gomp/for-16.C (foo): Likewise.
libgomp/
	* libgomp_g.h (GOMP_loop_ull_static_start, GOMP_loop_ull_dynamic_start,
	GOMP_loop_ull_guided_start, GOMP_loop_ull_runtime_start,
	GOMP_loop_ull_ordered_static_start,
	GOMP_loop_ull_ordered_dynamic_start,
	GOMP_loop_ull_ordered_guided_start,
	GOMP_loop_ull_ordered_runtime_start, GOMP_loop_ull_static_next,
	GOMP_loop_ull_dynamic_next, GOMP_loop_ull_guided_next,
	GOMP_loop_ull_runtime_next, GOMP_loop_ull_ordered_static_next,
	GOMP_loop_ull_ordered_dynamic_next, GOMP_loop_ull_ordered_guided_next,
	GOMP_loop_ull_ordered_runtime_next): New prototypes.
	* libgomp.h (struct work_share): Move chunk_size, end, incr into
	transparent union/struct, add chunk_size_ull, end_ll, incr_ll and
	next_ll fields.
	(gomp_iter_ull_static_next, gomp_iter_ull_dynamic_next_locked,
	gomp_iter_ull_guided_next_locked, gomp_iter_ull_dynamic_next,
	gomp_iter_ull_guided_next): New prototypes.
	* Makefile.am (libgomp_la_SOURCES): Add loop_ull.c and iter_ull.c.
	* Makefile.in: Regenerated.
	* loop_ull.c: New file.
	* iter_ull.c: New file.
	* libgomp.map (GOMP_loop_ordered_dynamic_first,
	GOMP_loop_ordered_guided_first, GOMP_loop_ordered_runtime_first,
	GOMP_loop_ordered_static_first): Remove.
	(GOMP_loop_ull_dynamic_next, GOMP_loop_ull_dynamic_start,
	GOMP_loop_ull_guided_next, GOMP_loop_ull_guided_start,
	GOMP_loop_ull_ordered_dynamic_next,
	GOMP_loop_ull_ordered_dynamic_start,
	GOMP_loop_ull_ordered_guided_next,
	GOMP_loop_ull_ordered_guided_start,
	GOMP_loop_ull_ordered_runtime_next,
	GOMP_loop_ull_ordered_runtime_start,
	GOMP_loop_ull_ordered_static_next,
	GOMP_loop_ull_ordered_static_start,
	GOMP_loop_ull_runtime_next, GOMP_loop_ull_runtime_start,
	GOMP_loop_ull_static_next, GOMP_loop_ull_static_start): Export
	@@GOMP_2.0.

	* testsuite/libgomp.c/loop-5.c: New test.
	* testsuite/libgomp.c/loop-6.c: New test.
	* testsuite/libgomp.c/loop-7.c: New test.
	* testsuite/libgomp.c++/loop-8.C: New test.
	* testsuite/libgomp.c++/loop-9.C: New test.
	* testsuite/libgomp.c++/loop-10.C: New test.

	* env.c (omp_set_schedule): For omp_sched_static set modifier to
	0 if it was smaller than 1.

--- gcc/c-parser.c.jj	2008-04-01 10:41:23.000000000 +0200
+++ gcc/c-parser.c	2008-05-15 17:07:30.000000000 +0200
@@ -7658,12 +7658,15 @@ c_parser_omp_for_loop (c_parser *parser,
       else if (c_parser_next_token_is (parser, CPP_NAME)
 	       && c_parser_peek_2nd_token (parser)->type == CPP_EQ)
 	{
+	  struct c_expr init_exp;
+
 	  decl = c_parser_postfix_expression (parser).value;
 
 	  c_parser_require (parser, CPP_EQ, "expected %<=%>");
 
-	  init = c_parser_expr_no_commas (parser, NULL).value;
-	  init = build_modify_expr (decl, NOP_EXPR, init);
+	  init_exp = c_parser_expr_no_commas (parser, NULL);
+	  init_exp = default_function_array_conversion (init_exp);
+	  init = build_modify_expr (decl, NOP_EXPR, init_exp.value);
 	  init = c_process_expr_stmt (init);
 
 	  c_parser_skip_until_found (parser, CPP_SEMICOLON, "expected %<;%>");
--- gcc/builtin-types.def.jj	2008-05-14 08:39:30.000000000 +0200
+++ gcc/builtin-types.def	2008-05-14 16:44:33.000000000 +0200
@@ -121,6 +121,7 @@ DEF_PRIMITIVE_TYPE (BT_I16, builtin_type
 
 DEF_POINTER_TYPE (BT_PTR_CONST_STRING, BT_CONST_STRING)
 DEF_POINTER_TYPE (BT_PTR_LONG, BT_LONG)
+DEF_POINTER_TYPE (BT_PTR_ULONGLONG, BT_ULONGLONG)
 DEF_POINTER_TYPE (BT_PTR_PTR, BT_PTR)
 
 DEF_FUNCTION_TYPE_0 (BT_FN_VOID, BT_VOID)
@@ -308,6 +309,8 @@ DEF_FUNCTION_TYPE_2 (BT_FN_I8_VPTR_I8, B
 DEF_FUNCTION_TYPE_2 (BT_FN_I16_VPTR_I16, BT_I16, BT_VOLATILE_PTR, BT_I16)
 DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_LONGPTR_LONGPTR,
 		     BT_BOOL, BT_PTR_LONG, BT_PTR_LONG)
+DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR,
+		     BT_BOOL, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG)
 
 DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR_PTR, BT_FN_VOID_PTR_PTR)
 
@@ -412,6 +415,9 @@ DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_LONG_LON
 DEF_FUNCTION_TYPE_6 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG,
 		     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT,
 		     BT_LONG, BT_LONG, BT_LONG)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULLPTR_ULLPTR,
+		     BT_BOOL, BT_BOOL, BT_ULONGLONG, BT_ULONGLONG,
+		     BT_ULONGLONG, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG)
 
 DEF_FUNCTION_TYPE_7 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG,
 		     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT,
@@ -420,6 +426,10 @@ DEF_FUNCTION_TYPE_7 (BT_FN_VOID_OMPFN_PT
 		     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
 		     BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG,
 		     BT_BOOL, BT_UINT)
+DEF_FUNCTION_TYPE_7 (BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULL_ULLPTR_ULLPTR,
+		     BT_BOOL, BT_BOOL, BT_ULONGLONG, BT_ULONGLONG,
+		     BT_ULONGLONG, BT_ULONGLONG,
+		     BT_PTR_ULONGLONG, BT_PTR_ULONGLONG)
 
 DEF_FUNCTION_TYPE_VAR_0 (BT_FN_VOID_VAR, BT_VOID)
 DEF_FUNCTION_TYPE_VAR_0 (BT_FN_INT_VAR, BT_INT)
--- gcc/omp-builtins.def.jj	2008-05-14 08:39:30.000000000 +0200
+++ gcc/omp-builtins.def	2008-05-14 16:18:36.000000000 +0200
@@ -115,6 +115,71 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ORD
 DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ORDERED_RUNTIME_NEXT,
 		  "GOMP_loop_ordered_runtime_next",
 		  BT_FN_BOOL_LONGPTR_LONGPTR, ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_STATIC_START,
+		  "GOMP_loop_ull_static_start",
+		  BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULL_ULLPTR_ULLPTR,
+		  ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_DYNAMIC_START,
+		  "GOMP_loop_ull_dynamic_start",
+		  BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULL_ULLPTR_ULLPTR,
+		  ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_GUIDED_START,
+		  "GOMP_loop_ull_guided_start",
+		  BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULL_ULLPTR_ULLPTR,
+		  ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_AUTO_START,
+		  "GOMP_loop_ull_auto_start",
+		  BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULLPTR_ULLPTR,
+		  ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_RUNTIME_START,
+		  "GOMP_loop_ull_runtime_start",
+		  BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULLPTR_ULLPTR,
+		  ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_ORDERED_STATIC_START,
+		  "GOMP_loop_ull_ordered_static_start",
+		  BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULL_ULLPTR_ULLPTR,
+		  ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START,
+		  "GOMP_loop_ull_ordered_dynamic_start",
+		  BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULL_ULLPTR_ULLPTR,
+		  ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_ORDERED_GUIDED_START,
+		  "GOMP_loop_ull_ordered_guided_start",
+		  BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULL_ULLPTR_ULLPTR,
+		  ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_ORDERED_AUTO_START,
+		  "GOMP_loop_ull_ordered_auto_start",
+		  BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULLPTR_ULLPTR,
+		  ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_ORDERED_RUNTIME_START,
+		  "GOMP_loop_ull_ordered_runtime_start",
+		  BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULLPTR_ULLPTR,
+		  ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT, "GOMP_loop_ull_static_next",
+		  BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR, ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_DYNAMIC_NEXT, "GOMP_loop_ull_dynamic_next",
+		  BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR, ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_GUIDED_NEXT, "GOMP_loop_ull_guided_next",
+		  BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR, ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_AUTO_NEXT, "GOMP_loop_ull_auto_next",
+		  BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR, ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_RUNTIME_NEXT, "GOMP_loop_ull_runtime_next",
+		  BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR, ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT,
+		  "GOMP_loop_ull_ordered_static_next",
+		  BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR, ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT,
+		  "GOMP_loop_ull_ordered_dynamic_next",
+		  BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR, ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT,
+		  "GOMP_loop_ull_ordered_guided_next",
+		  BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR, ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_ORDERED_AUTO_NEXT,
+		  "GOMP_loop_ull_ordered_auto_next",
+		  BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR, ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT,
+		  "GOMP_loop_ull_ordered_runtime_next",
+		  BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR, ATTR_NOTHROW_LIST)
 /* NOTE: Do not change the order of BUILT_IN_GOMP_PARALLEL_LOOP_*_START.
    They are used in index arithmetic with enum omp_clause_schedule_kind
    in omp-low.c.  */
--- gcc/gimplify.c.jj	2008-05-14 08:32:08.000000000 +0200
+++ gcc/gimplify.c	2008-05-14 12:09:04.000000000 +0200
@@ -5431,7 +5431,8 @@ gimplify_omp_for (tree *expr_p, tree *pr
 		  || TREE_CODE (t) == GIMPLE_MODIFY_STMT);
       decl = GENERIC_TREE_OPERAND (t, 0);
       gcc_assert (DECL_P (decl));
-      gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (decl)));
+      gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (decl))
+		  || POINTER_TYPE_P (TREE_TYPE (decl)));
 
       /* Make sure the iteration variable is private.  */
       if (omp_is_private (gimplify_omp_ctxp, decl))
@@ -5505,6 +5506,7 @@ gimplify_omp_for (tree *expr_p, tree *pr
 
 	      /* Fallthru.  */
 	    case MINUS_EXPR:
+	    case POINTER_PLUS_EXPR:
 	      gcc_assert (TREE_OPERAND (t, 0) == decl);
 	      TREE_OPERAND (t, 0) = var;
 	      break;
--- gcc/omp-low.c.jj	2008-05-14 08:39:30.000000000 +0200
+++ gcc/omp-low.c	2008-05-15 18:05:30.000000000 +0200
@@ -115,7 +115,7 @@ struct omp_for_data
 {
   struct omp_for_data_loop loop;
   tree chunk_size, for_stmt;
-  tree pre;
+  tree pre, iter_type;
   int collapse;
   bool have_nowait, have_ordered;
   enum omp_clause_schedule_kind sched_kind;
@@ -190,7 +190,7 @@ extract_omp_for_data (tree for_stmt, str
 		      struct omp_for_data_loop *loops)
 {
   tree t, var, *collapse_iter, *collapse_count;
-  tree count = NULL_TREE, iter_type = NULL_TREE;
+  tree count = NULL_TREE, iter_type = long_integer_type_node;
   struct omp_for_data_loop *loop;
   int i;
   struct omp_for_data_loop dummy_loop;
@@ -260,7 +260,8 @@ extract_omp_for_data (tree for_stmt, str
       gcc_assert (TREE_CODE (t) == GIMPLE_MODIFY_STMT);
       loop->v = GIMPLE_STMT_OPERAND (t, 0);
       gcc_assert (SSA_VAR_P (loop->v));
-      gcc_assert (TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE);
+      gcc_assert (TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
+		  || TREE_CODE (TREE_TYPE (loop->v)) == POINTER_TYPE);
       var = TREE_CODE (loop->v) == SSA_NAME ? SSA_NAME_VAR (loop->v) : loop->v;
       loop->n1 = GIMPLE_STMT_OPERAND (t, 1);
 
@@ -274,13 +275,21 @@ extract_omp_for_data (tree for_stmt, str
 	case GT_EXPR:
 	  break;
 	case LE_EXPR:
-	  loop->n2 = fold_build2 (PLUS_EXPR, TREE_TYPE (loop->n2), loop->n2,
-				  build_int_cst (TREE_TYPE (loop->n2), 1));
+	  if (POINTER_TYPE_P (TREE_TYPE (loop->n2)))
+	    loop->n2 = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (loop->n2),
+				    loop->n2, size_one_node);
+	  else
+	    loop->n2 = fold_build2 (PLUS_EXPR, TREE_TYPE (loop->n2), loop->n2,
+				    build_int_cst (TREE_TYPE (loop->n2), 1));
 	  loop->cond_code = LT_EXPR;
 	  break;
 	case GE_EXPR:
-	  loop->n2 = fold_build2 (MINUS_EXPR, TREE_TYPE (loop->n2), loop->n2,
-				  build_int_cst (TREE_TYPE (loop->n2), 1));
+	  if (POINTER_TYPE_P (TREE_TYPE (loop->n2)))
+	    loop->n2 = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (loop->n2),
+				    loop->n2, size_int (-1));
+	  else
+	    loop->n2 = fold_build2 (MINUS_EXPR, TREE_TYPE (loop->n2), loop->n2,
+				    build_int_cst (TREE_TYPE (loop->n2), 1));
 	  loop->cond_code = GT_EXPR;
 	  break;
 	default:
@@ -295,6 +304,7 @@ extract_omp_for_data (tree for_stmt, str
       switch (TREE_CODE (t))
 	{
 	case PLUS_EXPR:
+	case POINTER_PLUS_EXPR:
 	  loop->step = TREE_OPERAND (t, 1);
 	  break;
 	case MINUS_EXPR:
@@ -306,43 +316,109 @@ extract_omp_for_data (tree for_stmt, str
 	  gcc_unreachable ();
 	}
 
-      if (collapse_count && *collapse_count == NULL)
+      if (iter_type != long_long_unsigned_type_node)
 	{
-	  tree type;
+	  if (POINTER_TYPE_P (TREE_TYPE (loop->v)))
+	    iter_type = long_long_unsigned_type_node;
+	  else if (TYPE_UNSIGNED (TREE_TYPE (loop->v))
+		   && TYPE_PRECISION (TREE_TYPE (loop->v))
+		      >= TYPE_PRECISION (iter_type))
+	    {
+	      tree n;
+
+	      if (loop->cond_code == LT_EXPR)
+		n = fold_build2 (PLUS_EXPR, TREE_TYPE (loop->v),
+				 loop->n2, loop->step);
+	      else
+		n = loop->n1;
+	      if (TREE_CODE (n) != INTEGER_CST
+		  || tree_int_cst_lt (TYPE_MAX_VALUE (iter_type), n))
+		iter_type = long_long_unsigned_type_node;
+	    }
+	  else if (TYPE_PRECISION (TREE_TYPE (loop->v))
+		   > TYPE_PRECISION (iter_type))
+	    {
+	      tree n1, n2;
+
+	      if (loop->cond_code == LT_EXPR)
+		{
+		  n1 = loop->n1;
+		  n2 = fold_build2 (PLUS_EXPR, TREE_TYPE (loop->v),
+				    loop->n2, loop->step);
+		}
+	      else
+		{
+		  n1 = fold_build2 (MINUS_EXPR, TREE_TYPE (loop->v),
+				    loop->n2, loop->step);
+		  n2 = loop->n1;
+		}
+	      if (TREE_CODE (n1) != INTEGER_CST
+		  || TREE_CODE (n2) != INTEGER_CST
+		  || !tree_int_cst_lt (TYPE_MIN_VALUE (iter_type), n1)
+		  || !tree_int_cst_lt (n2, TYPE_MAX_VALUE (iter_type)))
+		iter_type = long_long_unsigned_type_node;
+	    }
+	}
 
-	  /* FIXME: wait for final OpenMP 3.0 standard to find out
-	     which type should be used for the collapsed count
-	     computation.  */
-	  if (i == 0)
-	    iter_type = TREE_TYPE (loop->v);
+      if (collapse_count && *collapse_count == NULL)
+	{
 	  if ((i == 0 || count != NULL_TREE)
-	      && TREE_CODE (loop->n1) == INTEGER_CST
-	      && TREE_CODE (loop->n2) == INTEGER_CST
+	      && TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
+	      && TREE_CONSTANT (loop->n1)
+	      && TREE_CONSTANT (loop->n2)
 	      && TREE_CODE (loop->step) == INTEGER_CST)
 	    {
-	      type = TREE_TYPE (loop->v);
-	      t = build_int_cst (type, (loop->cond_code == LT_EXPR ? -1 : 1));
-	      t = fold_build2 (PLUS_EXPR, type, loop->step, t);
-	      t = fold_build2 (PLUS_EXPR, type, t, loop->n2);
-	      t = fold_build2 (MINUS_EXPR, type, t, loop->n1);
-	      t = fold_build2 (TRUNC_DIV_EXPR, type, t, loop->step);
-	      t = fold_convert (iter_type, t);
+	      tree itype = TREE_TYPE (loop->v);
+
+	      if (POINTER_TYPE_P (itype))
+		itype
+		  = lang_hooks.types.type_for_size (TYPE_PRECISION (itype), 0);
+	      t = build_int_cst (itype, (loop->cond_code == LT_EXPR ? -1 : 1));
+	      t = fold_build2 (PLUS_EXPR, itype,
+			       fold_convert (itype, loop->step), t);
+	      t = fold_build2 (PLUS_EXPR, itype, t,
+			       fold_convert (itype, loop->n2));
+	      t = fold_build2 (MINUS_EXPR, itype, t,
+			       fold_convert (itype, loop->n1));
+	      if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR)
+		t = fold_build2 (TRUNC_DIV_EXPR, itype,
+				 fold_build1 (NEGATE_EXPR, itype, t),
+				 fold_build1 (NEGATE_EXPR, itype,
+					      fold_convert (itype,
+							    loop->step)));
+	      else
+		t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
+				 fold_convert (itype, loop->step));
+	      t = fold_convert (long_long_unsigned_type_node, t);
 	      if (count != NULL_TREE)
-		count = fold_build2 (MULT_EXPR, iter_type, count, t);
+		count = fold_build2 (MULT_EXPR, long_long_unsigned_type_node,
+				     count, t);
 	      else
 		count = t;
+	      if (TREE_CODE (count) != INTEGER_CST)
+		count = NULL_TREE;
 	    }
 	  else
 	    count = NULL_TREE;
 	}
     }
 
+  if (count)
+    {
+      if (!tree_int_cst_lt (count, TYPE_MAX_VALUE (long_integer_type_node)))
+	iter_type = long_long_unsigned_type_node;
+      else
+	iter_type = long_integer_type_node;
+    }
+  else if (collapse_iter && *collapse_iter != NULL)
+    iter_type = TREE_TYPE (*collapse_iter);
+  fd->iter_type = iter_type;
   if (collapse_iter && *collapse_iter == NULL)
     *collapse_iter = create_tmp_var (iter_type, ".iter");
   if (collapse_count && *collapse_count == NULL)
     {
       if (count)
-	*collapse_count = count;
+	*collapse_count = fold_convert (iter_type, count);
       else
 	*collapse_count = create_tmp_var (iter_type, ".count");
     }
@@ -418,6 +494,8 @@ workshare_safe_to_combine_p (basic_block
 
   if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
     return false;
+  if (fd.iter_type != long_integer_type_node)
+    return false;
 
   /* FIXME.  We give up too easily here.  If any of these arguments
      are not constants, they will likely involve variables that have
@@ -3329,7 +3407,7 @@ expand_omp_for_generic (struct omp_regio
 			enum built_in_function next_fn)
 {
   tree type, istart0, iend0, iend, phi;
-  tree t, vmain, vback;
+  tree t, vmain, vback, bias = NULL_TREE;
   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
   basic_block l2_bb = NULL, l3_bb = NULL;
   block_stmt_iterator si;
@@ -3340,11 +3418,12 @@ expand_omp_for_generic (struct omp_regio
   int i;
 
   gcc_assert (!broken_loop || !in_combined_parallel);
+  gcc_assert (fd->iter_type == long_integer_type_node
+	      || !in_combined_parallel);
 
   type = TREE_TYPE (fd->loop.v);
-
-  istart0 = create_tmp_var (long_integer_type_node, ".istart0");
-  iend0 = create_tmp_var (long_integer_type_node, ".iend0");
+  istart0 = create_tmp_var (fd->iter_type, ".istart0");
+  iend0 = create_tmp_var (fd->iter_type, ".iend0");
   TREE_ADDRESSABLE (istart0) = 1;
   TREE_ADDRESSABLE (iend0) = 1;
   if (gimple_in_ssa_p (cfun))
@@ -3353,6 +3432,29 @@ expand_omp_for_generic (struct omp_regio
       add_referenced_var (iend0);
     }
 
+  /* See if we need to bias by LLONG_MIN.  */
+  if (fd->iter_type == long_long_unsigned_type_node
+      && TREE_CODE (type) == INTEGER_TYPE
+      && !TYPE_UNSIGNED (type))
+    {
+      tree n1, n2;
+
+      if (fd->loop.cond_code == LT_EXPR)
+	{
+	  n1 = fd->loop.n1;
+	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
+	}
+      else
+	{
+	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
+	  n2 = fd->loop.n1;
+	}
+      if (TREE_CODE (n1) != INTEGER_CST
+	  || TREE_CODE (n2) != INTEGER_CST
+	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
+	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
+    }
+
   entry_bb = region->entry;
   cont_bb = region->cont;
   collapse_bb = NULL;
@@ -3383,12 +3485,26 @@ expand_omp_for_generic (struct omp_regio
       for (i = 0; i < fd->collapse; i++)
 	{
 	  tree itype = TREE_TYPE (fd->loops[i].v);
+
+	  if (POINTER_TYPE_P (itype))
+	    itype = lang_hooks.types.type_for_size (TYPE_PRECISION (itype), 0);
 	  t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
 				     ? -1 : 1));
-	  t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].step, t);
-	  t = fold_build2 (PLUS_EXPR, itype, t, fd->loops[i].n2);
-	  t = fold_build2 (MINUS_EXPR, itype, t, fd->loops[i].n1);
-	  t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd->loops[i].step);
+	  t = fold_build2 (PLUS_EXPR, itype,
+			   fold_convert (itype, fd->loops[i].step), t);
+	  t = fold_build2 (PLUS_EXPR, itype, t,
+			   fold_convert (itype, fd->loops[i].n2));
+	  t = fold_build2 (MINUS_EXPR, itype, t,
+			   fold_convert (itype, fd->loops[i].n1));
+	  if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
+	    t = fold_build2 (TRUNC_DIV_EXPR, itype,
+			     fold_build1 (NEGATE_EXPR, itype, t),
+			     fold_build1 (NEGATE_EXPR, itype,
+					  fold_convert (itype,
+							fd->loops[i].step)));
+	  else
+	    t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
+			     fold_convert (itype, fd->loops[i].step));
 	  t = fold_convert (type, t);
 	  if (TREE_CODE (t) == INTEGER_CST)
 	    counts[i] = t;
@@ -3428,18 +3544,48 @@ expand_omp_for_generic (struct omp_regio
 	 GOMP_loop_foo_start in ENTRY_BB.  */
       t4 = build_fold_addr_expr (iend0);
       t3 = build_fold_addr_expr (istart0);
-      t2 = fold_convert (long_integer_type_node, fd->loop.step);
-      t1 = fold_convert (long_integer_type_node, fd->loop.n2);
-      t0 = fold_convert (long_integer_type_node, fd->loop.n1);
-      if (fd->chunk_size)
-	{
-	  t = fold_convert (long_integer_type_node, fd->chunk_size);
-	  t = build_call_expr (built_in_decls[start_fn], 6,
-			       t0, t1, t2, t, t3, t4);
+      t2 = fold_convert (fd->iter_type, fd->loop.step);
+      t1 = fold_convert (fd->iter_type, fd->loop.n2);
+      t0 = fold_convert (fd->iter_type, fd->loop.n1);
+      if (bias)
+	{
+	  t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
+	  t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
+	}
+      if (fd->iter_type == long_integer_type_node)
+	{
+	  if (fd->chunk_size)
+	    {
+	      t = fold_convert (fd->iter_type, fd->chunk_size);
+	      t = build_call_expr (built_in_decls[start_fn], 6,
+				   t0, t1, t2, t, t3, t4);
+	    }
+	  else
+	    t = build_call_expr (built_in_decls[start_fn], 5,
+				 t0, t1, t2, t3, t4);
 	}
       else
-	t = build_call_expr (built_in_decls[start_fn], 5,
-			     t0, t1, t2, t3, t4);
+	{
+	  tree t5;
+	  tree c_bool_type;
+
+	  /* The GOMP_loop_ull_*start functions have additional boolean
+	     argument, true for < loops and false for > loops.
+	     In Fortran, the C bool type can be different from
+	     boolean_type_node.  */
+	  c_bool_type = TREE_TYPE (TREE_TYPE (built_in_decls[start_fn]));
+	  t5 = build_int_cst (c_bool_type,
+			      fd->loop.cond_code == LT_EXPR ? 1 : 0);
+	  if (fd->chunk_size)
+	    {
+	      t = fold_convert (fd->iter_type, fd->chunk_size);
+	      t = build_call_expr (built_in_decls[start_fn], 7,
+				   t5, t0, t1, t2, t, t3, t4);
+	    }
+	  else
+	    t = build_call_expr (built_in_decls[start_fn], 6,
+				 t5, t0, t1, t2, t3, t4);
+	}
     }
   if (TREE_TYPE (t) != boolean_type_node)
     t = fold_build2 (NE_EXPR, boolean_type_node,
@@ -3454,7 +3600,11 @@ expand_omp_for_generic (struct omp_regio
 
   /* Iteration setup for sequential loop goes in L0_BB.  */
   si = bsi_start (l0_bb);
-  t = fold_convert (type, istart0);
+  if (bias)
+    t = fold_convert (type, fold_build2 (MINUS_EXPR, fd->iter_type,
+					 istart0, bias));
+  else
+    t = fold_convert (type, istart0);
   t = force_gimple_operand_bsi (&si, t, false, NULL_TREE,
 				false, BSI_CONTINUE_LINKING);
   t = build_gimple_modify_stmt (fd->loop.v, t);
@@ -3462,7 +3612,11 @@ expand_omp_for_generic (struct omp_regio
   if (gimple_in_ssa_p (cfun))
     SSA_NAME_DEF_STMT (fd->loop.v) = t;
 
-  t = fold_convert (type, iend0);
+  if (bias)
+    t = fold_convert (type, fold_build2 (MINUS_EXPR, fd->iter_type,
+					 iend0, bias));
+  else
+    t = fold_convert (type, iend0);
   iend = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
 				   false, BSI_CONTINUE_LINKING);
   if (fd->collapse > 1)
@@ -3473,11 +3627,18 @@ expand_omp_for_generic (struct omp_regio
       bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
       for (i = fd->collapse - 1; i >= 0; i--)
 	{
-	  tree itype = TREE_TYPE (fd->loops[i].v);
+	  tree vtype = TREE_TYPE (fd->loops[i].v), itype;
+	  itype = vtype;
+	  if (POINTER_TYPE_P (vtype))
+	    itype = lang_hooks.types.type_for_size (TYPE_PRECISION (vtype), 0);
 	  t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
 	  t = fold_convert (itype, t);
 	  t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].step);
-	  t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
+	  if (POINTER_TYPE_P (vtype))
+	    t = fold_build2 (POINTER_PLUS_EXPR, vtype,
+			     fd->loops[i].n1, fold_convert (sizetype, t));
+	  else
+	    t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
 	  t = build_gimple_modify_stmt (fd->loops[i].v, t);
 	  force_gimple_operand_bsi (&si, t, true, NULL_TREE,
 				    false, BSI_CONTINUE_LINKING);
@@ -3501,7 +3662,11 @@ expand_omp_for_generic (struct omp_regio
       vmain = TREE_OPERAND (t, 1);
       vback = TREE_OPERAND (t, 0);
 
-      t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
+      if (POINTER_TYPE_P (type))
+	t = fold_build2 (POINTER_PLUS_EXPR, type, vmain,
+			 fold_convert (sizetype, fd->loop.step));
+      else
+	t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
       t = force_gimple_operand_bsi (&si, t, false, NULL_TREE,
 				    true, BSI_SAME_STMT);
       t = build_gimple_modify_stmt (vback, t);
@@ -3523,7 +3688,7 @@ expand_omp_for_generic (struct omp_regio
 	  last_bb = cont_bb;
 	  for (i = fd->collapse - 1; i >= 0; i--)
 	    {
-	      tree itype = TREE_TYPE (fd->loops[i].v);
+	      tree vtype = TREE_TYPE (fd->loops[i].v);
 
 	      bb = create_empty_bb (last_bb);
 	      si = bsi_start (bb);
@@ -3543,8 +3708,13 @@ expand_omp_for_generic (struct omp_regio
 
 	      set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
 
-	      t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].v,
-			       fd->loops[i].step);
+	      if (POINTER_TYPE_P (vtype))
+		t = fold_build2 (POINTER_PLUS_EXPR, vtype,
+				 fd->loops[i].v,
+				 fold_convert (sizetype, fd->loops[i].step));
+	      else
+		t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v,
+				 fd->loops[i].step);
 	      t = build_gimple_modify_stmt (fd->loops[i].v, t);
 	      force_gimple_operand_bsi (&si, t, true, NULL_TREE,
 					false, BSI_CONTINUE_LINKING);
@@ -3646,7 +3816,10 @@ expand_omp_for_generic (struct omp_regio
 	  adj = STEP - 1;
 	else
 	  adj = STEP + 1;
-	n = (adj + N2 - N1) / STEP;
+	if ((__typeof (V)) -1 > 0 && cond is >)
+	  n = -(adj + N2 - N1) / -STEP;
+	else
+	  n = (adj + N2 - N1) / STEP;
 	q = n / nthreads;
 	q += (q * nthreads != n);
 	s0 = q * threadid;
@@ -3667,12 +3840,14 @@ expand_omp_for_static_nochunk (struct om
 			       struct omp_for_data *fd)
 {
   tree n, q, s0, e0, e, t, nthreads, threadid;
-  tree type, vmain, vback;
+  tree type, itype, vmain, vback;
   basic_block entry_bb, exit_bb, seq_start_bb, body_bb, cont_bb;
   basic_block fin_bb;
   block_stmt_iterator si;
 
-  type = TREE_TYPE (fd->loop.v);
+  itype = type = TREE_TYPE (fd->loop.v);
+  if (POINTER_TYPE_P (type))
+    itype = lang_hooks.types.type_for_size (TYPE_PRECISION (type), 0);
 
   entry_bb = region->entry;
   cont_bb = region->cont;
@@ -3690,12 +3865,12 @@ expand_omp_for_static_nochunk (struct om
   gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
 
   t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS], 0);
-  t = fold_convert (type, t);
+  t = fold_convert (itype, t);
   nthreads = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
 				       true, BSI_SAME_STMT);
   
   t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM], 0);
-  t = fold_convert (type, t);
+  t = fold_convert (itype, t);
   threadid = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
 				       true, BSI_SAME_STMT);
 
@@ -3703,33 +3878,38 @@ expand_omp_for_static_nochunk (struct om
     = force_gimple_operand_bsi (&si, fold_convert (type, fd->loop.n1),
 				true, NULL_TREE, true, BSI_SAME_STMT);
   fd->loop.n2
-    = force_gimple_operand_bsi (&si, fold_convert (type, fd->loop.n2),
+    = force_gimple_operand_bsi (&si, fold_convert (itype, fd->loop.n2),
 				true, NULL_TREE, true, BSI_SAME_STMT);
   fd->loop.step
-    = force_gimple_operand_bsi (&si, fold_convert (type, fd->loop.step),
+    = force_gimple_operand_bsi (&si, fold_convert (itype, fd->loop.step),
 				true, NULL_TREE, true, BSI_SAME_STMT);
 
-  t = build_int_cst (type, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
-  t = fold_build2 (PLUS_EXPR, type, fd->loop.step, t);
-  t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n2);
-  t = fold_build2 (MINUS_EXPR, type, t, fd->loop.n1);
-  t = fold_build2 (TRUNC_DIV_EXPR, type, t, fd->loop.step);
-  t = fold_convert (type, t);
+  t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
+  t = fold_build2 (PLUS_EXPR, itype, fd->loop.step, t);
+  t = fold_build2 (PLUS_EXPR, itype, t, fd->loop.n2);
+  t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, fd->loop.n1));
+  if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
+    t = fold_build2 (TRUNC_DIV_EXPR, itype,
+		     fold_build1 (NEGATE_EXPR, itype, t),
+		     fold_build1 (NEGATE_EXPR, itype, fd->loop.step));
+  else
+    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd->loop.step);
+  t = fold_convert (itype, t);
   n = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT);
 
-  t = fold_build2 (TRUNC_DIV_EXPR, type, n, nthreads);
+  t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
   q = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT);
 
-  t = fold_build2 (MULT_EXPR, type, q, nthreads);
-  t = fold_build2 (NE_EXPR, type, t, n);
-  t = fold_build2 (PLUS_EXPR, type, q, t);
+  t = fold_build2 (MULT_EXPR, itype, q, nthreads);
+  t = fold_build2 (NE_EXPR, itype, t, n);
+  t = fold_build2 (PLUS_EXPR, itype, q, t);
   q = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT);
 
-  t = build2 (MULT_EXPR, type, q, threadid);
+  t = build2 (MULT_EXPR, itype, q, threadid);
   s0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT);
 
-  t = fold_build2 (PLUS_EXPR, type, s0, q);
-  t = fold_build2 (MIN_EXPR, type, t, n);
+  t = fold_build2 (PLUS_EXPR, itype, s0, q);
+  t = fold_build2 (MIN_EXPR, itype, t, n);
   e0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT);
 
   t = build2 (GE_EXPR, boolean_type_node, s0, e0);
@@ -3742,9 +3922,13 @@ expand_omp_for_static_nochunk (struct om
   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
   si = bsi_start (seq_start_bb);
 
-  t = fold_convert (type, s0);
-  t = fold_build2 (MULT_EXPR, type, t, fd->loop.step);
-  t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
+  t = fold_convert (itype, s0);
+  t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
+  if (POINTER_TYPE_P (type))
+    t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1,
+		     fold_convert (sizetype, t));
+  else
+    t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
   t = force_gimple_operand_bsi (&si, t, false, NULL_TREE,
 				false, BSI_CONTINUE_LINKING);
   t = build_gimple_modify_stmt (fd->loop.v, t);
@@ -3752,9 +3936,13 @@ expand_omp_for_static_nochunk (struct om
   if (gimple_in_ssa_p (cfun))
     SSA_NAME_DEF_STMT (fd->loop.v) = t;
 
-  t = fold_convert (type, e0);
-  t = fold_build2 (MULT_EXPR, type, t, fd->loop.step);
-  t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
+  t = fold_convert (itype, e0);
+  t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
+  if (POINTER_TYPE_P (type))
+    t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1,
+		     fold_convert (sizetype, t));
+  else
+    t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
   e = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
 				false, BSI_CONTINUE_LINKING);
 
@@ -3765,7 +3953,11 @@ expand_omp_for_static_nochunk (struct om
   vmain = TREE_OPERAND (t, 1);
   vback = TREE_OPERAND (t, 0);
 
-  t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
+  if (POINTER_TYPE_P (type))
+    t = fold_build2 (POINTER_PLUS_EXPR, type, vmain,
+		     fold_convert (sizetype, fd->loop.step));
+  else
+    t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
   t = force_gimple_operand_bsi (&si, t, false, NULL_TREE,
 				true, BSI_SAME_STMT);
   t = build_gimple_modify_stmt (vback, t);
@@ -3814,7 +4006,10 @@ expand_omp_for_static_nochunk (struct om
 	  adj = STEP - 1;
 	else
 	  adj = STEP + 1;
-	n = (adj + N2 - N1) / STEP;
+	if ((__typeof (V)) -1 > 0 && cond is >)
+	  n = -(adj + N2 - N1) / -STEP;
+	else
+	  n = (adj + N2 - N1) / STEP;
 	trip = 0;
 	V = threadid * CHUNK * STEP + N1;  -- this extra definition of V is
 					      here so that V is defined
@@ -3842,13 +4037,15 @@ expand_omp_for_static_chunk (struct omp_
 {
   tree n, s0, e0, e, t, phi, nphi, args;
   tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
-  tree type, cont, v_main, v_back, v_extra;
+  tree type, itype, cont, v_main, v_back, v_extra;
   basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
   basic_block trip_update_bb, cont_bb, fin_bb;
   block_stmt_iterator si;
   edge se, re, ene;
 
-  type = TREE_TYPE (fd->loop.v);
+  itype = type = TREE_TYPE (fd->loop.v);
+  if (POINTER_TYPE_P (type))
+    itype = lang_hooks.types.type_for_size (TYPE_PRECISION (type), 0);
 
   entry_bb = region->entry;
   se = split_block (entry_bb, last_stmt (entry_bb));
@@ -3871,12 +4068,12 @@ expand_omp_for_static_chunk (struct omp_
   gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
 
   t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS], 0);
-  t = fold_convert (type, t);
+  t = fold_convert (itype, t);
   nthreads = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
 				       true, BSI_SAME_STMT);
   
   t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM], 0);
-  t = fold_convert (type, t);
+  t = fold_convert (itype, t);
   threadid = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
 				       true, BSI_SAME_STMT);
 
@@ -3884,25 +4081,30 @@ expand_omp_for_static_chunk (struct omp_
     = force_gimple_operand_bsi (&si, fold_convert (type, fd->loop.n1),
 				true, NULL_TREE, true, BSI_SAME_STMT);
   fd->loop.n2
-    = force_gimple_operand_bsi (&si, fold_convert (type, fd->loop.n2),
+    = force_gimple_operand_bsi (&si, fold_convert (itype, fd->loop.n2),
 				true, NULL_TREE, true, BSI_SAME_STMT);
   fd->loop.step
-    = force_gimple_operand_bsi (&si, fold_convert (type, fd->loop.step),
+    = force_gimple_operand_bsi (&si, fold_convert (itype, fd->loop.step),
 				true, NULL_TREE, true, BSI_SAME_STMT);
   fd->chunk_size
-    = force_gimple_operand_bsi (&si, fold_convert (type, fd->chunk_size),
+    = force_gimple_operand_bsi (&si, fold_convert (itype, fd->chunk_size),
 				true, NULL_TREE, true, BSI_SAME_STMT);
 
-  t = build_int_cst (type, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
-  t = fold_build2 (PLUS_EXPR, type, fd->loop.step, t);
-  t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n2);
-  t = fold_build2 (MINUS_EXPR, type, t, fd->loop.n1);
-  t = fold_build2 (TRUNC_DIV_EXPR, type, t, fd->loop.step);
-  t = fold_convert (type, t);
+  t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
+  t = fold_build2 (PLUS_EXPR, itype, fd->loop.step, t);
+  t = fold_build2 (PLUS_EXPR, itype, t, fd->loop.n2);
+  t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, fd->loop.n1));
+  if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
+    t = fold_build2 (TRUNC_DIV_EXPR, itype,
+		     fold_build1 (NEGATE_EXPR, itype, t),
+		     fold_build1 (NEGATE_EXPR, itype, fd->loop.step));
+  else
+    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd->loop.step);
+  t = fold_convert (itype, t);
   n = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
 				true, BSI_SAME_STMT);
 
-  trip_var = create_tmp_var (type, ".trip");
+  trip_var = create_tmp_var (itype, ".trip");
   if (gimple_in_ssa_p (cfun))
     {
       add_referenced_var (trip_var);
@@ -3917,14 +4119,18 @@ expand_omp_for_static_chunk (struct omp_
       trip_back = trip_var;
     }
 
-  t = build_gimple_modify_stmt (trip_init, build_int_cst (type, 0));
+  t = build_gimple_modify_stmt (trip_init, build_int_cst (itype, 0));
   bsi_insert_before (&si, t, BSI_SAME_STMT);
   if (gimple_in_ssa_p (cfun))
     SSA_NAME_DEF_STMT (trip_init) = t;
 
-  t = fold_build2 (MULT_EXPR, type, threadid, fd->chunk_size);
-  t = fold_build2 (MULT_EXPR, type, t, fd->loop.step);
-  t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
+  t = fold_build2 (MULT_EXPR, itype, threadid, fd->chunk_size);
+  t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
+  if (POINTER_TYPE_P (type))
+    t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1,
+		     fold_convert (sizetype, t));
+  else
+    t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
   v_extra = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
 				      true, BSI_SAME_STMT);
 
@@ -3934,14 +4140,14 @@ expand_omp_for_static_chunk (struct omp_
   /* Iteration space partitioning goes in ITER_PART_BB.  */
   si = bsi_last (iter_part_bb);
 
-  t = fold_build2 (MULT_EXPR, type, trip_main, nthreads);
-  t = fold_build2 (PLUS_EXPR, type, t, threadid);
-  t = fold_build2 (MULT_EXPR, type, t, fd->chunk_size);
+  t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
+  t = fold_build2 (PLUS_EXPR, itype, t, threadid);
+  t = fold_build2 (MULT_EXPR, itype, t, fd->chunk_size);
   s0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
 				 false, BSI_CONTINUE_LINKING);
 
-  t = fold_build2 (PLUS_EXPR, type, s0, fd->chunk_size);
-  t = fold_build2 (MIN_EXPR, type, t, n);
+  t = fold_build2 (PLUS_EXPR, itype, s0, fd->chunk_size);
+  t = fold_build2 (MIN_EXPR, itype, t, n);
   e0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
 				 false, BSI_CONTINUE_LINKING);
 
@@ -3952,9 +4158,13 @@ expand_omp_for_static_chunk (struct omp_
   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
   si = bsi_start (seq_start_bb);
 
-  t = fold_convert (type, s0);
-  t = fold_build2 (MULT_EXPR, type, t, fd->loop.step);
-  t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
+  t = fold_convert (itype, s0);
+  t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
+  if (POINTER_TYPE_P (type))
+    t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1,
+		     fold_convert (sizetype, t));
+  else
+    t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
   t = force_gimple_operand_bsi (&si, t, false, NULL_TREE,
 				false, BSI_CONTINUE_LINKING);
   t = build_gimple_modify_stmt (fd->loop.v, t);
@@ -3962,9 +4172,13 @@ expand_omp_for_static_chunk (struct omp_
   if (gimple_in_ssa_p (cfun))
     SSA_NAME_DEF_STMT (fd->loop.v) = t;
 
-  t = fold_convert (type, e0);
-  t = fold_build2 (MULT_EXPR, type, t, fd->loop.step);
-  t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
+  t = fold_convert (itype, e0);
+  t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
+  if (POINTER_TYPE_P (type))
+    t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1,
+		     fold_convert (sizetype, t));
+  else
+    t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
   e = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
 				false, BSI_CONTINUE_LINKING);
 
@@ -3976,7 +4190,11 @@ expand_omp_for_static_chunk (struct omp_
   v_main = TREE_OPERAND (cont, 1);
   v_back = TREE_OPERAND (cont, 0);
 
-  t = build2 (PLUS_EXPR, type, v_main, fd->loop.step);
+  if (POINTER_TYPE_P (type))
+    t = fold_build2 (POINTER_PLUS_EXPR, type, v_main,
+		     fold_convert (sizetype, fd->loop.step));
+  else
+    t = build2 (PLUS_EXPR, type, v_main, fd->loop.step);
   t = build_gimple_modify_stmt (v_back, t);
   bsi_insert_before (&si, t, BSI_SAME_STMT);
   if (gimple_in_ssa_p (cfun))
@@ -3992,8 +4210,8 @@ expand_omp_for_static_chunk (struct omp_
   /* Trip update code goes into TRIP_UPDATE_BB.  */
   si = bsi_start (trip_update_bb);
 
-  t = build_int_cst (type, 1);
-  t = build2 (PLUS_EXPR, type, trip_main, t);
+  t = build_int_cst (itype, 1);
+  t = build2 (PLUS_EXPR, itype, trip_main, t);
   t = build_gimple_modify_stmt (trip_back, t);
   bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
   if (gimple_in_ssa_p (cfun))
@@ -4107,6 +4325,13 @@ expand_omp_for (struct omp_region *regio
       int fn_index = fd.sched_kind + fd.have_ordered * 5;
       int start_ix = BUILT_IN_GOMP_LOOP_STATIC_START + fn_index;
       int next_ix = BUILT_IN_GOMP_LOOP_STATIC_NEXT + fn_index;
+      if (fd.iter_type == long_long_unsigned_type_node)
+	{
+	  start_ix += BUILT_IN_GOMP_LOOP_ULL_STATIC_START
+		      - BUILT_IN_GOMP_LOOP_STATIC_START;
+	  next_ix += BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
+		     - BUILT_IN_GOMP_LOOP_STATIC_NEXT;
+	}
       expand_omp_for_generic (region, &fd, start_ix, next_ix);
     }
 
--- gcc/c-omp.c.jj	2008-05-14 08:32:08.000000000 +0200
+++ gcc/c-omp.c	2008-05-14 17:44:08.000000000 +0200
@@ -239,13 +239,12 @@ c_finish_omp_for (location_t locus, tree
 	elocus = EXPR_LOCATION (init);
 
       /* Validate the iteration variable.  */
-      if (!INTEGRAL_TYPE_P (TREE_TYPE (decl)))
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (decl))
+	  && TREE_CODE (TREE_TYPE (decl)) != POINTER_TYPE)
 	{
 	  error ("%Hinvalid type for iteration variable %qE", &elocus, decl);
 	  fail = true;
 	}
-      if (TYPE_UNSIGNED (TREE_TYPE (decl)))
-	warning (0, "%Hiteration variable %qE is unsigned", &elocus, decl);
 
       /* In the case of "for (int i = 0...)", init will be a decl.  It should
 	 have a DECL_INITIAL that we can turn into an assignment.  */
@@ -355,7 +354,20 @@ c_finish_omp_for (location_t locus, tree
 	    case PREINCREMENT_EXPR:
 	    case POSTDECREMENT_EXPR:
 	    case PREDECREMENT_EXPR:
-	      incr_ok = (TREE_OPERAND (incr, 0) == decl);
+	      if (TREE_OPERAND (incr, 0) != decl)
+		break;
+
+	      incr_ok = true;
+	      if (POINTER_TYPE_P (TREE_TYPE (decl)))
+		{
+		  tree t = fold_convert (sizetype, TREE_OPERAND (incr, 1));
+
+		  if (TREE_CODE (incr) == POSTDECREMENT_EXPR
+		      || TREE_CODE (incr) == PREDECREMENT_EXPR)
+		    t = fold_build1 (NEGATE_EXPR, sizetype, t);
+		  t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (decl), decl, t);
+		  incr = build2 (MODIFY_EXPR, void_type_node, decl, t);
+		}
 	      break;
 
 	    case MODIFY_EXPR:
@@ -367,7 +379,9 @@ c_finish_omp_for (location_t locus, tree
 		  && (TREE_OPERAND (TREE_OPERAND (incr, 1), 0) == decl
 		      || TREE_OPERAND (TREE_OPERAND (incr, 1), 1) == decl))
 		incr_ok = true;
-	      else if (TREE_CODE (TREE_OPERAND (incr, 1)) == MINUS_EXPR
+	      else if ((TREE_CODE (TREE_OPERAND (incr, 1)) == MINUS_EXPR
+			|| (TREE_CODE (TREE_OPERAND (incr, 1))
+			    == POINTER_PLUS_EXPR))
 		       && TREE_OPERAND (TREE_OPERAND (incr, 1), 0) == decl)
 		incr_ok = true;
 	      else
--- gcc/cp/semantics.c.jj	2008-05-14 08:32:08.000000000 +0200
+++ gcc/cp/semantics.c	2008-05-14 12:09:04.000000000 +0200
@@ -4211,7 +4211,8 @@ finish_omp_for (location_t locus, tree d
 	  continue;
 	}
 
-      if (!INTEGRAL_TYPE_P (TREE_TYPE (decl)))
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (decl))
+	  && TREE_CODE (TREE_TYPE (decl)) != POINTER_TYPE)
 	{
 	  error ("%Hinvalid type for iteration variable %qE", &elocus, decl);
 	  return NULL;
--- gcc/fortran/types.def.jj	2008-05-14 08:39:30.000000000 +0200
+++ gcc/fortran/types.def	2008-05-14 16:47:40.000000000 +0200
@@ -55,6 +55,7 @@ DEF_PRIMITIVE_TYPE (BT_BOOL,
 DEF_PRIMITIVE_TYPE (BT_INT, integer_type_node)
 DEF_PRIMITIVE_TYPE (BT_UINT, unsigned_type_node)
 DEF_PRIMITIVE_TYPE (BT_LONG, long_integer_type_node)
+DEF_PRIMITIVE_TYPE (BT_ULONGLONG, long_long_unsigned_type_node)
 DEF_PRIMITIVE_TYPE (BT_WORD, (*lang_hooks.types.type_for_mode) (word_mode, 1))
 
 DEF_PRIMITIVE_TYPE (BT_I1, builtin_type_for_size (BITS_PER_UNIT*1, 1))
@@ -71,6 +72,7 @@ DEF_PRIMITIVE_TYPE (BT_VOLATILE_PTR,
                                             TYPE_QUAL_VOLATILE)))
 
 DEF_POINTER_TYPE (BT_PTR_LONG, BT_LONG)
+DEF_POINTER_TYPE (BT_PTR_ULONGLONG, BT_ULONGLONG)
 DEF_POINTER_TYPE (BT_PTR_PTR, BT_PTR)
 DEF_FUNCTION_TYPE_0 (BT_FN_BOOL, BT_BOOL)
 DEF_FUNCTION_TYPE_0 (BT_FN_PTR, BT_PTR)
@@ -88,6 +90,8 @@ DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR, BT
 
 DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_LONGPTR_LONGPTR,
                      BT_BOOL, BT_PTR_LONG, BT_PTR_LONG)
+DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR,
+		     BT_BOOL, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG)
 DEF_FUNCTION_TYPE_2 (BT_FN_I1_VPTR_I1, BT_I1, BT_VOLATILE_PTR, BT_I1)
 DEF_FUNCTION_TYPE_2 (BT_FN_I2_VPTR_I2, BT_I2, BT_VOLATILE_PTR, BT_I2)
 DEF_FUNCTION_TYPE_2 (BT_FN_I4_VPTR_I4, BT_I4, BT_VOLATILE_PTR, BT_I4)
@@ -131,6 +135,9 @@ DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_LONG_LON
 DEF_FUNCTION_TYPE_6 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG,
                      BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT,
                      BT_LONG, BT_LONG, BT_LONG)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULLPTR_ULLPTR,
+		     BT_BOOL, BT_BOOL, BT_ULONGLONG, BT_ULONGLONG,
+		     BT_ULONGLONG, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG)
 
 DEF_FUNCTION_TYPE_7 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG,
                      BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT,
@@ -139,5 +146,9 @@ DEF_FUNCTION_TYPE_7 (BT_FN_VOID_OMPFN_PT
 		     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
 		     BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG,
 		     BT_BOOL, BT_UINT)
+DEF_FUNCTION_TYPE_7 (BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULL_ULLPTR_ULLPTR,
+		     BT_BOOL, BT_BOOL, BT_ULONGLONG, BT_ULONGLONG,
+		     BT_ULONGLONG, BT_ULONGLONG,
+		     BT_PTR_ULONGLONG, BT_PTR_ULONGLONG)
 
 DEF_FUNCTION_TYPE_VAR_0 (BT_FN_VOID_VAR, BT_VOID)
--- gcc/testsuite/gcc.dg/gomp/pr27499.c.jj	2008-05-14 08:32:08.000000000 +0200
+++ gcc/testsuite/gcc.dg/gomp/pr27499.c	2008-05-14 12:09:04.000000000 +0200
@@ -8,6 +8,6 @@ foo (void)
 {
   unsigned int i;
 #pragma omp parallel for
-  for (i = 0; i < 64; ++i)	/* { dg-warning "is unsigned" } */
+  for (i = 0; i < 64; ++i)
     bar (i);
 }
--- gcc/testsuite/g++.dg/gomp/pr27499.C.jj	2008-05-14 08:32:08.000000000 +0200
+++ gcc/testsuite/g++.dg/gomp/pr27499.C	2008-05-14 12:09:04.000000000 +0200
@@ -8,6 +8,6 @@ foo (void)
 {
   unsigned int i;
 #pragma omp for
-  for (i = 0; i < 64; ++i)	// { dg-warning "is unsigned" }
+  for (i = 0; i < 64; ++i)
     bar (i);
 }
--- gcc/testsuite/g++.dg/gomp/for-16.C.jj	2008-05-14 08:32:08.000000000 +0200
+++ gcc/testsuite/g++.dg/gomp/for-16.C	2008-05-14 12:09:04.000000000 +0200
@@ -4,7 +4,7 @@ template<typename T>
 void foo ()
 {
 #pragma omp for
-  for (unsigned int i = 0; i < 10; i++); // { dg-warning "is unsigned" }
+  for (unsigned int i = 0; i < 10; i++);
 #pragma omp for
   for (int j = 0; ; j++); // { dg-error "missing controlling predicate" }
 #pragma omp for
--- libgomp/loop_ull.c.jj	2008-05-14 12:05:34.000000000 +0200
+++ libgomp/loop_ull.c	2008-05-14 18:03:27.000000000 +0200
@@ -0,0 +1,553 @@
+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU OpenMP Library (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or
+   (at your option) any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+   more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with libgomp; see the file COPYING.LIB.  If not, write to the
+   Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+/* As a special exception, if you link this library with other files, some
+   of which are compiled with GCC, to produce an executable, this library
+   does not by itself cause the resulting executable to be covered by the
+   GNU General Public License.  This exception does not however invalidate
+   any other reasons why the executable file might be covered by the GNU
+   General Public License.  */
+
+/* This file handles the LOOP (FOR/DO) construct.  */
+
+#include <limits.h>
+#include <stdlib.h>
+#include "libgomp.h"
+
+typedef unsigned long long gomp_ull;
+
+/* Initialize the given work share construct from the given arguments.  */
+
+static inline void
+gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start,
+		    gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched,
+		    gomp_ull chunk_size)
+{
+  ws->sched = sched;
+  ws->chunk_size_ull = chunk_size;
+  /* Canonicalize loops that have zero iterations to ->next == ->end.  */
+  ws->end_ull = ((up && start > end) || (!up && start < end))
+		? start : end;
+  ws->incr_ull = incr;
+  ws->next_ull = start;
+  ws->mode = 0;
+  if (sched == GFS_DYNAMIC)
+    {
+      ws->chunk_size_ull *= incr;
+
+#if defined HAVE_SYNC_BUILTINS && defined __LP64__
+      {
+	/* For dynamic scheduling prepare things to make each iteration
+	   faster.  */
+	struct gomp_thread *thr = gomp_thread ();
+	struct gomp_team *team = thr->ts.team;
+	long nthreads = team ? team->nthreads : 1;
+
+	if (__builtin_expect (up, 1))
+	  {
+	    /* Cheap overflow protection.  */
+	    if (__builtin_expect ((nthreads | ws->chunk_size_ull)
+				  < 1ULL << (sizeof (gomp_ull)
+					     * __CHAR_BIT__ / 2 - 1), 1))
+	      ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1
+					- (nthreads + 1) * ws->chunk_size_ull);
+	  }
+	/* Cheap overflow protection.  */
+	else if (__builtin_expect ((nthreads | -ws->chunk_size_ull)
+				   < 1ULL << (sizeof (gomp_ull)
+					      * __CHAR_BIT__ / 2 - 1), 1))
+	  ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull
+				    - (__LONG_LONG_MAX__ * 2ULL + 1));
+      }
+#endif
+    }
+  if (!up)
+    ws->mode |= 2;
+}
+
+/* The *_start routines are called when first encountering a loop construct
+   that is not bound directly to a parallel construct.  The first thread
+   that arrives will create the work-share construct; subsequent threads
+   will see the construct exists and allocate work from it.
+
+   START, END, INCR are the bounds of the loop; due to the restrictions of
+   OpenMP, these values must be the same in every thread.  This is not
+   verified (nor is it entirely verifiable, since START is not necessarily
+   retained intact in the work-share data structure).  CHUNK_SIZE is the
+   scheduling parameter; again this must be identical in all threads.
+
+   Returns true if there's any work for this thread to perform.  If so,
+   *ISTART and *IEND are filled with the bounds of the iteration block
+   allocated to this thread.  Returns false if all work was assigned to
+   other threads prior to this thread's arrival.  */
+
+static bool
+gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
+			    gomp_ull incr, gomp_ull chunk_size,
+			    gomp_ull *istart, gomp_ull *iend)
+{
+  struct gomp_thread *thr = gomp_thread ();
+
+  thr->ts.static_trip = 0;
+  if (gomp_work_share_start (false))
+    {
+      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+			  GFS_STATIC, chunk_size);
+      gomp_work_share_init_done ();
+    }
+
+  return !gomp_iter_ull_static_next (istart, iend);
+}
+
+static bool
+gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
+			     gomp_ull incr, gomp_ull chunk_size,
+			     gomp_ull *istart, gomp_ull *iend)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  bool ret;
+
+  if (gomp_work_share_start (false))
+    {
+      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+			  GFS_DYNAMIC, chunk_size);
+      gomp_work_share_init_done ();
+    }
+
+#if defined HAVE_SYNC_BUILTINS && defined __LP64__
+  ret = gomp_iter_ull_dynamic_next (istart, iend);
+#else
+  gomp_mutex_lock (&thr->ts.work_share->lock);
+  ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
+  gomp_mutex_unlock (&thr->ts.work_share->lock);
+#endif
+
+  return ret;
+}
+
+static bool
+gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
+			    gomp_ull incr, gomp_ull chunk_size,
+			    gomp_ull *istart, gomp_ull *iend)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  bool ret;
+
+  if (gomp_work_share_start (false))
+    {
+      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+			  GFS_GUIDED, chunk_size);
+      gomp_work_share_init_done ();
+    }
+
+#if defined HAVE_SYNC_BUILTINS && defined __LP64__
+  ret = gomp_iter_ull_guided_next (istart, iend);
+#else
+  gomp_mutex_lock (&thr->ts.work_share->lock);
+  ret = gomp_iter_ull_guided_next_locked (istart, iend);
+  gomp_mutex_unlock (&thr->ts.work_share->lock);
+#endif
+
+  return ret;
+}
+
+bool
+GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end,
+			     gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
+{
+  struct gomp_task_icv *icv = gomp_icv (false);
+  switch (icv->run_sched_var)
+    {
+    case GFS_STATIC:
+      return gomp_loop_ull_static_start (up, start, end, incr,
+					 icv->run_sched_modifier,
+					 istart, iend);
+    case GFS_DYNAMIC:
+      return gomp_loop_ull_dynamic_start (up, start, end, incr,
+					  icv->run_sched_modifier,
+					  istart, iend);
+    case GFS_GUIDED:
+      return gomp_loop_ull_guided_start (up, start, end, incr,
+					 icv->run_sched_modifier,
+					 istart, iend);
+    default:
+      abort ();
+    }
+}
+
+/* The *_ordered_*_start routines are similar.  The only difference is that
+   this work-share construct is initialized to expect an ORDERED section.  */
+
+static bool
+gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
+				    gomp_ull incr, gomp_ull chunk_size,
+				    gomp_ull *istart, gomp_ull *iend)
+{
+  struct gomp_thread *thr = gomp_thread ();
+
+  thr->ts.static_trip = 0;
+  if (gomp_work_share_start (true))
+    {
+      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+			  GFS_STATIC, chunk_size);
+      gomp_ordered_static_init ();
+      gomp_work_share_init_done ();
+    }
+
+  return !gomp_iter_ull_static_next (istart, iend);
+}
+
+static bool
+gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
+				     gomp_ull incr, gomp_ull chunk_size,
+				     gomp_ull *istart, gomp_ull *iend)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  bool ret;
+
+  if (gomp_work_share_start (true))
+    {
+      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+			  GFS_DYNAMIC, chunk_size);
+      gomp_mutex_lock (&thr->ts.work_share->lock);
+      gomp_work_share_init_done ();
+    }
+  else
+    gomp_mutex_lock (&thr->ts.work_share->lock);
+
+  ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
+  if (ret)
+    gomp_ordered_first ();
+  gomp_mutex_unlock (&thr->ts.work_share->lock);
+
+  return ret;
+}
+
+static bool
+gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
+				    gomp_ull incr, gomp_ull chunk_size,
+				    gomp_ull *istart, gomp_ull *iend)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  bool ret;
+
+  if (gomp_work_share_start (true))
+    {
+      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+			  GFS_GUIDED, chunk_size);
+      gomp_mutex_lock (&thr->ts.work_share->lock);
+      gomp_work_share_init_done ();
+    }
+  else
+    gomp_mutex_lock (&thr->ts.work_share->lock);
+
+  ret = gomp_iter_ull_guided_next_locked (istart, iend);
+  if (ret)
+    gomp_ordered_first ();
+  gomp_mutex_unlock (&thr->ts.work_share->lock);
+
+  return ret;
+}
+
+bool
+GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
+				     gomp_ull incr, gomp_ull *istart,
+				     gomp_ull *iend)
+{
+  struct gomp_task_icv *icv = gomp_icv (false);
+  switch (icv->run_sched_var)
+    {
+    case GFS_STATIC:
+      return gomp_loop_ull_ordered_static_start (up, start, end, incr,
+						 icv->run_sched_modifier,
+						 istart, iend);
+    case GFS_DYNAMIC:
+      return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr,
+						  icv->run_sched_modifier,
+						  istart, iend);
+    case GFS_GUIDED:
+      return gomp_loop_ull_ordered_guided_start (up, start, end, incr,
+						 icv->run_sched_modifier,
+						 istart, iend);
+    default:
+      abort ();
+    }
+}
+
+/* The *_next routines are called when the thread completes processing of
+   the iteration block currently assigned to it.  If the work-share
+   construct is bound directly to a parallel construct, then the iteration
+   bounds may have been set up before the parallel.  In which case, this
+   may be the first iteration for the thread.
+
+   Returns true if there is work remaining to be performed; *ISTART and
+   *IEND are filled with a new iteration block.  Returns false if all work
+   has been assigned.  */
+
+static bool
+gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
+{
+  return !gomp_iter_ull_static_next (istart, iend);
+}
+
+static bool
+gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
+{
+  bool ret;
+
+#if defined HAVE_SYNC_BUILTINS && defined __LP64__
+  ret = gomp_iter_ull_dynamic_next (istart, iend);
+#else
+  struct gomp_thread *thr = gomp_thread ();
+  gomp_mutex_lock (&thr->ts.work_share->lock);
+  ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
+  gomp_mutex_unlock (&thr->ts.work_share->lock);
+#endif
+
+  return ret;
+}
+
+static bool
+gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
+{
+  bool ret;
+
+#if defined HAVE_SYNC_BUILTINS && defined __LP64__
+  ret = gomp_iter_ull_guided_next (istart, iend);
+#else
+  struct gomp_thread *thr = gomp_thread ();
+  gomp_mutex_lock (&thr->ts.work_share->lock);
+  ret = gomp_iter_ull_guided_next_locked (istart, iend);
+  gomp_mutex_unlock (&thr->ts.work_share->lock);
+#endif
+
+  return ret;
+}
+
+bool
+GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend)
+{
+  struct gomp_thread *thr = gomp_thread ();
+
+  switch (thr->ts.work_share->sched)
+    {
+    case GFS_STATIC:
+      return gomp_loop_ull_static_next (istart, iend);
+    case GFS_DYNAMIC:
+      return gomp_loop_ull_dynamic_next (istart, iend);
+    case GFS_GUIDED:
+      return gomp_loop_ull_guided_next (istart, iend);
+    default:
+      abort ();
+    }
+}
+
+/* The *_ordered_*_next routines are called when the thread completes
+   processing of the iteration block currently assigned to it.
+
+   Returns true if there is work remaining to be performed; *ISTART and
+   *IEND are filled with a new iteration block.  Returns false if all work
+   has been assigned.  */
+
+static bool
+gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  int test;
+
+  gomp_ordered_sync ();
+  gomp_mutex_lock (&thr->ts.work_share->lock);
+  test = gomp_iter_ull_static_next (istart, iend);
+  if (test >= 0)
+    gomp_ordered_static_next ();
+  gomp_mutex_unlock (&thr->ts.work_share->lock);
+
+  return test == 0;
+}
+
+static bool
+gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  bool ret;
+
+  gomp_ordered_sync ();
+  gomp_mutex_lock (&thr->ts.work_share->lock);
+  ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
+  if (ret)
+    gomp_ordered_next ();
+  else
+    gomp_ordered_last ();
+  gomp_mutex_unlock (&thr->ts.work_share->lock);
+
+  return ret;
+}
+
+static bool
+gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  bool ret;
+
+  gomp_ordered_sync ();
+  gomp_mutex_lock (&thr->ts.work_share->lock);
+  ret = gomp_iter_ull_guided_next_locked (istart, iend);
+  if (ret)
+    gomp_ordered_next ();
+  else
+    gomp_ordered_last ();
+  gomp_mutex_unlock (&thr->ts.work_share->lock);
+
+  return ret;
+}
+
+bool
+GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend)
+{
+  struct gomp_thread *thr = gomp_thread ();
+
+  switch (thr->ts.work_share->sched)
+    {
+    case GFS_STATIC:
+      return gomp_loop_ull_ordered_static_next (istart, iend);
+    case GFS_DYNAMIC:
+      return gomp_loop_ull_ordered_dynamic_next (istart, iend);
+    case GFS_GUIDED:
+      return gomp_loop_ull_ordered_guided_next (istart, iend);
+    default:
+      abort ();
+    }
+}
+
+/* We use static functions above so that we're sure that the "runtime"
+   function can defer to the proper routine without interposition.  We
+   export the static function with a strong alias when possible, or with
+   a wrapper function otherwise.  */
+
+#ifdef HAVE_ATTRIBUTE_ALIAS
+extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start
+	__attribute__((alias ("gomp_loop_ull_static_start")));
+extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start
+	__attribute__((alias ("gomp_loop_ull_dynamic_start")));
+extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start
+	__attribute__((alias ("gomp_loop_ull_guided_start")));
+
+extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
+	__attribute__((alias ("gomp_loop_ull_ordered_static_start")));
+extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start
+	__attribute__((alias ("gomp_loop_ull_ordered_dynamic_start")));
+extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start
+	__attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
+
+extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next
+	__attribute__((alias ("gomp_loop_ull_static_next")));
+extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next
+	__attribute__((alias ("gomp_loop_ull_dynamic_next")));
+extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next
+	__attribute__((alias ("gomp_loop_ull_guided_next")));
+
+extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
+	__attribute__((alias ("gomp_loop_ull_ordered_static_next")));
+extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next
+	__attribute__((alias ("gomp_loop_ull_ordered_dynamic_next")));
+extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next
+	__attribute__((alias ("gomp_loop_ull_ordered_guided_next")));
+#else
+bool
+GOMP_loop_ull_static_start (gomp_ull start, gomp_ull end, gomp_ull incr, gomp_ull chunk_size,
+			gomp_ull *istart, gomp_ull *iend)
+{
+  return gomp_loop_ull_static_start (start, end, incr, chunk_size, istart, iend);
+}
+
+bool
+GOMP_loop_ull_dynamic_start (gomp_ull start, gomp_ull end, gomp_ull incr, gomp_ull chunk_size,
+			 gomp_ull *istart, gomp_ull *iend)
+{
+  return gomp_loop_ull_dynamic_start (start, end, incr, chunk_size, istart, iend);
+}
+
+bool
+GOMP_loop_ull_guided_start (gomp_ull start, gomp_ull end, gomp_ull incr, gomp_ull chunk_size,
+			gomp_ull *istart, gomp_ull *iend)
+{
+  return gomp_loop_ull_guided_start (start, end, incr, chunk_size, istart, iend);
+}
+
+bool
+GOMP_loop_ull_ordered_static_start (gomp_ull start, gomp_ull end, gomp_ull incr,
+				gomp_ull chunk_size, gomp_ull *istart, gomp_ull *iend)
+{
+  return gomp_loop_ull_ordered_static_start (start, end, incr, chunk_size,
+					 istart, iend);
+}
+
+bool
+GOMP_loop_ull_ordered_dynamic_start (gomp_ull start, gomp_ull end, gomp_ull incr,
+				 gomp_ull chunk_size, gomp_ull *istart, gomp_ull *iend)
+{
+  return gomp_loop_ull_ordered_dynamic_start (start, end, incr, chunk_size,
+					  istart, iend);
+}
+
+bool
+GOMP_loop_ull_ordered_guided_start (gomp_ull start, gomp_ull end, gomp_ull incr,
+				gomp_ull chunk_size, gomp_ull *istart, gomp_ull *iend)
+{
+  return gomp_loop_ull_ordered_guided_start (start, end, incr, chunk_size,
+					 istart, iend);
+}
+
+bool
+GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
+{
+  return gomp_loop_ull_static_next (istart, iend);
+}
+
+bool
+GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
+{
+  return gomp_loop_ull_dynamic_next (istart, iend);
+}
+
+bool
+GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
+{
+  return gomp_loop_ull_guided_next (istart, iend);
+}
+
+bool
+GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
+{
+  return gomp_loop_ull_ordered_static_next (istart, iend);
+}
+
+bool
+GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
+{
+  return gomp_loop_ull_ordered_dynamic_next (istart, iend);
+}
+
+bool
+GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
+{
+  return gomp_loop_ull_ordered_guided_next (istart, iend);
+}
+#endif
--- libgomp/libgomp_g.h.jj	2008-05-14 08:39:30.000000000 +0200
+++ libgomp/libgomp_g.h	2008-05-14 12:05:34.000000000 +0200
@@ -83,6 +83,74 @@ extern void GOMP_parallel_loop_runtime_s
 extern void GOMP_loop_end (void);
 extern void GOMP_loop_end_nowait (void);
 
+/* loop_ull.c */
+
+extern bool GOMP_loop_ull_static_start (bool, unsigned long long,
+					unsigned long long,
+					unsigned long long,
+					unsigned long long,
+					unsigned long long *,
+					unsigned long long *);
+extern bool GOMP_loop_ull_dynamic_start (bool, unsigned long long,
+					 unsigned long long,
+					 unsigned long long,
+					 unsigned long long,
+					 unsigned long long *,
+					 unsigned long long *);
+extern bool GOMP_loop_ull_guided_start (bool, unsigned long long,
+					unsigned long long,
+					unsigned long long,
+					unsigned long long,
+					unsigned long long *,
+					unsigned long long *);
+extern bool GOMP_loop_ull_runtime_start (bool, unsigned long long,
+					 unsigned long long,
+					 unsigned long long,
+					 unsigned long long *,
+					 unsigned long long *);
+
+extern bool GOMP_loop_ull_ordered_static_start (bool, unsigned long long,
+						unsigned long long,
+						unsigned long long,
+						unsigned long long,
+						unsigned long long *,
+						unsigned long long *);
+extern bool GOMP_loop_ull_ordered_dynamic_start (bool, unsigned long long,
+						 unsigned long long,
+						 unsigned long long,
+						 unsigned long long,
+						 unsigned long long *,
+						 unsigned long long *);
+extern bool GOMP_loop_ull_ordered_guided_start (bool, unsigned long long,
+						unsigned long long,
+						unsigned long long,
+						unsigned long long,
+						unsigned long long *,
+						unsigned long long *);
+extern bool GOMP_loop_ull_ordered_runtime_start (bool, unsigned long long,
+						 unsigned long long,
+						 unsigned long long,
+						 unsigned long long *,
+						 unsigned long long *);
+
+extern bool GOMP_loop_ull_static_next (unsigned long long *,
+				       unsigned long long *);
+extern bool GOMP_loop_ull_dynamic_next (unsigned long long *,
+					unsigned long long *);
+extern bool GOMP_loop_ull_guided_next (unsigned long long *,
+				       unsigned long long *);
+extern bool GOMP_loop_ull_runtime_next (unsigned long long *,
+					unsigned long long *);
+
+extern bool GOMP_loop_ull_ordered_static_next (unsigned long long *,
+					       unsigned long long *);
+extern bool GOMP_loop_ull_ordered_dynamic_next (unsigned long long *,
+						unsigned long long *);
+extern bool GOMP_loop_ull_ordered_guided_next (unsigned long long *,
+					       unsigned long long *);
+extern bool GOMP_loop_ull_ordered_runtime_next (unsigned long long *,
+						unsigned long long *);
+
 /* ordered.c */
 
 extern void GOMP_ordered_start (void);
--- libgomp/Makefile.in.jj	2008-03-25 08:53:58.000000000 +0100
+++ libgomp/Makefile.in	2008-05-14 12:05:34.000000000 +0200
@@ -81,9 +81,10 @@ toolexeclibLTLIBRARIES_INSTALL = $(INSTA
 LTLIBRARIES = $(toolexeclib_LTLIBRARIES)
 libgomp_la_LIBADD =
 am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \
-	error.lo iter.lo loop.lo ordered.lo parallel.lo sections.lo \
-	single.lo task.lo team.lo work.lo lock.lo mutex.lo proc.lo \
-	sem.lo bar.lo ptrlock.lo time.lo fortran.lo affinity.lo
+	error.lo iter.lo iter_ull.lo loop.lo loop_ull.lo ordered.lo \
+	parallel.lo sections.lo single.lo task.lo team.lo work.lo \
+	lock.lo mutex.lo proc.lo sem.lo bar.lo ptrlock.lo time.lo \
+	fortran.lo affinity.lo
 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
 DEFAULT_INCLUDES = -I. -I$(srcdir) -I.
 depcomp = $(SHELL) $(top_srcdir)/../depcomp
@@ -293,8 +294,9 @@ nodist_toolexeclib_HEADERS = libgomp.spe
 libgomp_version_info = -version-info $(libtool_VERSION)
 libgomp_la_LDFLAGS = $(libgomp_version_info) $(libgomp_version_script)
 libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
-	loop.c ordered.c parallel.c sections.c single.c task.c team.c work.c \
-	lock.c mutex.c proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c
+	iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \
+	task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \
+	time.c fortran.c affinity.c
 
 nodist_noinst_HEADERS = libgomp_f.h
 nodist_libsubinclude_HEADERS = omp.h
@@ -430,8 +432,10 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fortran.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter_ull.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop_ull.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mutex.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
--- libgomp/libgomp.h.jj	2008-05-07 11:05:29.000000000 +0200
+++ libgomp/libgomp.h	2008-05-14 12:05:34.000000000 +0200
@@ -74,16 +74,27 @@ struct gomp_work_share
 
   int mode;
 
-  /* This is the chunk_size argument to the SCHEDULE clause.  */
-  long chunk_size;
-
-  /* This is the iteration end point.  If this is a SECTIONS construct, 
-     this is the number of contained sections.  */
-  long end;
-
-  /* This is the iteration step.  If this is a SECTIONS construct, this
-     is always 1.  */
-  long incr;
+  union {
+    struct {
+      /* This is the chunk_size argument to the SCHEDULE clause.  */
+      long chunk_size;
+
+      /* This is the iteration end point.  If this is a SECTIONS construct,
+	 this is the number of contained sections.  */
+      long end;
+
+      /* This is the iteration step.  If this is a SECTIONS construct, this
+	 is always 1.  */
+      long incr;
+    };
+
+    struct {
+      /* The same as above, but for the unsigned long long loop variants.  */
+      unsigned long long chunk_size_ull;
+      unsigned long long end_ull;
+      unsigned long long incr_ull;
+    };
+  };
 
   /* This is a circular queue that details which threads will be allowed
      into the ordered region and in which order.  When a thread allocates
@@ -129,6 +140,9 @@ struct gomp_work_share
        GFS_STATIC loops, this the iteration start point and never changes.  */
     long next;
 
+    /* The same, but with unsigned long long type.  */
+    unsigned long long next_ull;
+
     /* This is the returned data structure for SINGLE COPYPRIVATE.  */
     void *copyprivate;
   };
@@ -395,6 +409,22 @@ extern bool gomp_iter_dynamic_next (long
 extern bool gomp_iter_guided_next (long *, long *);
 #endif
 
+/* iter_ull.c */
+
+extern int gomp_iter_ull_static_next (unsigned long long *,
+				      unsigned long long *);
+extern bool gomp_iter_ull_dynamic_next_locked (unsigned long long *,
+					       unsigned long long *);
+extern bool gomp_iter_ull_guided_next_locked (unsigned long long *,
+					      unsigned long long *);
+
+#if defined HAVE_SYNC_BUILTINS && defined __LP64__
+extern bool gomp_iter_ull_dynamic_next (unsigned long long *,
+					unsigned long long *);
+extern bool gomp_iter_ull_guided_next (unsigned long long *,
+				       unsigned long long *);
+#endif
+
 /* ordered.c */
 
 extern void gomp_ordered_first (void);
--- libgomp/iter_ull.c.jj	2008-05-14 12:05:34.000000000 +0200
+++ libgomp/iter_ull.c	2008-05-14 20:41:51.000000000 +0200
@@ -0,0 +1,344 @@
+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU OpenMP Library (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or
+   (at your option) any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+   more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with libgomp; see the file COPYING.LIB.  If not, write to the
+   Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+/* As a special exception, if you link this library with other files, some
+   of which are compiled with GCC, to produce an executable, this library
+   does not by itself cause the resulting executable to be covered by the
+   GNU General Public License.  This exception does not however invalidate
+   any other reasons why the executable file might be covered by the GNU
+   General Public License.  */
+
+/* This file contains routines for managing work-share iteration, both
+   for loops and sections.  */
+
+#include "libgomp.h"
+#include <stdlib.h>
+
+typedef unsigned long long gomp_ull;
+
+/* This function implements the STATIC scheduling method.  The caller should
+   iterate *pstart <= x < *pend.  Return zero if there are more iterations
+   to perform; nonzero if not.  Return less than 0 if this thread had
+   received the absolutely last iteration.  */
+
+int
+gomp_iter_ull_static_next (gomp_ull *pstart, gomp_ull *pend)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_team *team = thr->ts.team;
+  struct gomp_work_share *ws = thr->ts.work_share;
+  unsigned long nthreads = team ? team->nthreads : 1;
+
+  if (thr->ts.static_trip == -1)
+    return -1;
+
+  /* Quick test for degenerate teams and orphaned constructs.  */
+  if (nthreads == 1)
+    {
+      *pstart = ws->next_ull;
+      *pend = ws->end_ull;
+      thr->ts.static_trip = -1;
+      return ws->next_ull == ws->end_ull;
+    }
+
+  /* We interpret chunk_size zero as "unspecified", which means that we
+     should break up the iterations such that each thread makes only one
+     trip through the outer loop.  */
+  if (ws->chunk_size_ull == 0)
+    {
+      gomp_ull n, q, i, s0, e0, s, e;
+
+      if (thr->ts.static_trip > 0)
+	return 1;
+
+      /* Compute the total number of iterations.  */
+      if (__builtin_expect (ws->mode, 0) == 0)
+	n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
+      else
+	n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
+      i = thr->ts.team_id;
+
+      /* Compute the "zero-based" start and end points.  That is, as
+	 if the loop began at zero and incremented by one.  */
+      q = n / nthreads;
+      q += (q * nthreads != n);
+      s0 = q * i;
+      e0 = s0 + q;
+      if (e0 > n)
+	e0 = n;
+
+      /* Notice when no iterations allocated for this thread.  */
+      if (s0 >= e0)
+	{
+	  thr->ts.static_trip = 1;
+	  return 1;
+	}
+
+      /* Transform these to the actual start and end numbers.  */
+      s = s0 * ws->incr_ull + ws->next_ull;
+      e = e0 * ws->incr_ull + ws->next_ull;
+
+      *pstart = s;
+      *pend = e;
+      thr->ts.static_trip = (e0 == n ? -1 : 1);
+      return 0;
+    }
+  else
+    {
+      gomp_ull n, s0, e0, i, c, s, e;
+
+      /* Otherwise, each thread gets exactly chunk_size iterations
+	 (if available) each time through the loop.  */
+
+      if (__builtin_expect (ws->mode, 0) == 0)
+	n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
+      else
+	n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
+      i = thr->ts.team_id;
+      c = ws->chunk_size_ull;
+
+      /* Initial guess is a C sized chunk positioned nthreads iterations
+	 in, offset by our thread number.  */
+      s0 = (thr->ts.static_trip * (gomp_ull) nthreads + i) * c;
+      e0 = s0 + c;
+
+      /* Detect overflow.  */
+      if (s0 >= n)
+	return 1;
+      if (e0 > n)
+	e0 = n;
+
+      /* Transform these to the actual start and end numbers.  */
+      s = s0 * ws->incr_ull + ws->next_ull;
+      e = e0 * ws->incr_ull + ws->next_ull;
+
+      *pstart = s;
+      *pend = e;
+
+      if (e0 == n)
+	thr->ts.static_trip = -1;
+      else
+	thr->ts.static_trip++;
+      return 0;
+    }
+}
+
+
+/* This function implements the DYNAMIC scheduling method.  Arguments are
+   as for gomp_iter_ull_static_next.  This function must be called with
+   ws->lock held.  */
+
+bool
+gomp_iter_ull_dynamic_next_locked (gomp_ull *pstart, gomp_ull *pend)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_work_share *ws = thr->ts.work_share;
+  gomp_ull start, end, chunk, left;
+
+  start = ws->next_ull;
+  if (start == ws->end_ull)
+    return false;
+
+  chunk = ws->chunk_size_ull;
+  left = ws->end_ull - start;
+  if (__builtin_expect (ws->mode & 2, 0))
+    {
+      if (chunk < left)
+	chunk = left;
+    }
+  else
+    {
+      if (chunk > left)
+	chunk = left;
+    }
+  end = start + chunk;
+
+  ws->next_ull = end;
+  *pstart = start;
+  *pend = end;
+  return true;
+}
+
+
+#if defined HAVE_SYNC_BUILTINS && defined __LP64__
+/* Similar, but doesn't require the lock held, and uses compare-and-swap
+   instead.  Note that the only memory value that changes is ws->next_ull.  */
+
+bool
+gomp_iter_ull_dynamic_next (gomp_ull *pstart, gomp_ull *pend)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_work_share *ws = thr->ts.work_share;
+  gomp_ull start, end, nend, chunk;
+
+  end = ws->end_ull;
+  chunk = ws->chunk_size_ull;
+
+  if (__builtin_expect (ws->mode & 1, 1))
+    {
+      gomp_ull tmp = __sync_fetch_and_add (&ws->next_ull, chunk);
+      if (__builtin_expect (ws->mode & 2, 0) == 0)
+	{
+	  if (tmp >= end)
+	    return false;
+	  nend = tmp + chunk;
+	  if (nend > end)
+	    nend = end;
+	  *pstart = tmp;
+	  *pend = nend;
+	  return true;
+	}
+      else
+	{
+	  if (tmp <= end)
+	    return false;
+	  nend = tmp + chunk;
+	  if (nend < end)
+	    nend = end;
+	  *pstart = tmp;
+	  *pend = nend;
+	  return true;
+	}
+    }
+
+  start = ws->next_ull;
+  while (1)
+    {
+      gomp_ull left = end - start;
+      gomp_ull tmp;
+
+      if (start == end)
+	return false;
+
+      if (__builtin_expect (ws->mode & 2, 0))
+	{
+	  if (chunk < left)
+	    chunk = left;
+	}
+      else
+	{
+	  if (chunk > left)
+	    chunk = left;
+	}
+      nend = start + chunk;
+
+      tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
+      if (__builtin_expect (tmp == start, 1))
+	break;
+
+      start = tmp;
+    }
+
+  *pstart = start;
+  *pend = nend;
+  return true;
+}
+#endif /* HAVE_SYNC_BUILTINS */
+
+
+/* This function implements the GUIDED scheduling method.  Arguments are
+   as for gomp_iter_ull_static_next.  This function must be called with the
+   work share lock held.  */
+
+bool
+gomp_iter_ull_guided_next_locked (gomp_ull *pstart, gomp_ull *pend)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_work_share *ws = thr->ts.work_share;
+  struct gomp_team *team = thr->ts.team;
+  gomp_ull nthreads = team ? team->nthreads : 1;
+  gomp_ull n, q;
+  gomp_ull start, end;
+
+  if (ws->next_ull == ws->end_ull)
+    return false;
+
+  start = ws->next_ull;
+  if (__builtin_expect (ws->mode, 0) == 0)
+    n = (ws->end_ull - start) / ws->incr_ull;
+  else
+    n = (start - ws->end_ull) / -ws->incr_ull;
+  q = (n + nthreads - 1) / nthreads;
+
+  if (q < ws->chunk_size_ull)
+    q = ws->chunk_size_ull;
+  if (q <= n)
+    end = start + q * ws->incr_ull;
+  else
+    end = ws->end_ull;
+
+  ws->next_ull = end;
+  *pstart = start;
+  *pend = end;
+  return true;
+}
+
+#if defined HAVE_SYNC_BUILTINS && defined __LP64__
+/* Similar, but doesn't require the lock held, and uses compare-and-swap
+   instead.  Note that the only memory value that changes is ws->next_ull.  */
+
+bool
+gomp_iter_ull_guided_next (gomp_ull *pstart, gomp_ull *pend)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_work_share *ws = thr->ts.work_share;
+  struct gomp_team *team = thr->ts.team;
+  gomp_ull nthreads = team ? team->nthreads : 1;
+  gomp_ull start, end, nend, incr;
+  gomp_ull chunk_size;
+
+  start = ws->next_ull;
+  end = ws->end_ull;
+  incr = ws->incr_ull;
+  chunk_size = ws->chunk_size_ull;
+
+  while (1)
+    {
+      gomp_ull n, q;
+      gomp_ull tmp;
+
+      if (start == end)
+	return false;
+
+      if (__builtin_expect (ws->mode, 0) == 0)
+	n = (end - start) / incr;
+      else
+	n = (start - end) / -incr;
+      q = (n + nthreads - 1) / nthreads;
+
+      if (q < chunk_size)
+	q = chunk_size;
+      if (__builtin_expect (q <= n, 1))
+	nend = start + q * incr;
+      else
+	nend = end;
+
+      tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
+      if (__builtin_expect (tmp == start, 1))
+	break;
+
+      start = tmp;
+    }
+
+  *pstart = start;
+  *pend = nend;
+  return true;
+}
+#endif /* HAVE_SYNC_BUILTINS */
--- libgomp/Makefile.am.jj	2008-03-21 11:16:56.000000000 +0100
+++ libgomp/Makefile.am	2008-05-14 12:05:34.000000000 +0200
@@ -30,8 +30,9 @@ libgomp_version_info = -version-info $(l
 libgomp_la_LDFLAGS = $(libgomp_version_info) $(libgomp_version_script)
 
 libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
-	loop.c ordered.c parallel.c sections.c single.c task.c team.c work.c \
-	lock.c mutex.c proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c
+	iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \
+	task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \
+	time.c fortran.c affinity.c
 
 nodist_noinst_HEADERS = libgomp_f.h
 nodist_libsubinclude_HEADERS = omp.h
--- libgomp/env.c.jj	2008-05-14 08:39:38.000000000 +0200
+++ libgomp/env.c	2008-05-14 19:23:14.000000000 +0200
@@ -592,6 +592,10 @@ omp_set_schedule (omp_sched_t kind, int 
   switch (kind)
     {
     case omp_sched_static:
+      if (modifier < 1)
+	modifier = 0;
+      icv->run_sched_modifier = modifier;
+      break;
     case omp_sched_dynamic:
     case omp_sched_guided:
       if (modifier < 1)
--- libgomp/libgomp.map.jj	2008-05-14 08:39:30.000000000 +0200
+++ libgomp/libgomp.map	2008-05-14 12:05:34.000000000 +0200
@@ -117,16 +117,12 @@ GOMP_1.0 {
 	GOMP_loop_end_nowait;
 	GOMP_loop_guided_next;
 	GOMP_loop_guided_start;
-	GOMP_loop_ordered_dynamic_first;
 	GOMP_loop_ordered_dynamic_next;
 	GOMP_loop_ordered_dynamic_start;
-	GOMP_loop_ordered_guided_first;
 	GOMP_loop_ordered_guided_next;
 	GOMP_loop_ordered_guided_start;
-	GOMP_loop_ordered_runtime_first;
 	GOMP_loop_ordered_runtime_next;
 	GOMP_loop_ordered_runtime_start;
-	GOMP_loop_ordered_static_first;
 	GOMP_loop_ordered_static_next;
 	GOMP_loop_ordered_static_start;
 	GOMP_loop_runtime_next;
@@ -155,4 +151,20 @@ GOMP_2.0 {
   global:
 	GOMP_task;
 	GOMP_taskwait;
+	GOMP_loop_ull_dynamic_next;
+	GOMP_loop_ull_dynamic_start;
+	GOMP_loop_ull_guided_next;
+	GOMP_loop_ull_guided_start;
+	GOMP_loop_ull_ordered_dynamic_next;
+	GOMP_loop_ull_ordered_dynamic_start;
+	GOMP_loop_ull_ordered_guided_next;
+	GOMP_loop_ull_ordered_guided_start;
+	GOMP_loop_ull_ordered_runtime_next;
+	GOMP_loop_ull_ordered_runtime_start;
+	GOMP_loop_ull_ordered_static_next;
+	GOMP_loop_ull_ordered_static_start;
+	GOMP_loop_ull_runtime_next;
+	GOMP_loop_ull_runtime_start;
+	GOMP_loop_ull_static_next;
+	GOMP_loop_ull_static_start;
 } GOMP_1.0;
--- libgomp/testsuite/libgomp.c/loop-5.c.jj	2008-05-14 22:39:33.000000000 +0200
+++ libgomp/testsuite/libgomp.c/loop-5.c	2008-05-15 16:12:35.000000000 +0200
@@ -0,0 +1,276 @@
+#include <omp.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+test1 (void)
+{
+  short int buf[64], *p;
+  int i;
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for
+  for (p = &buf[10]; p < &buf[54]; p++)
+    *p = 5;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 5 * (i >= 10 && i < 54))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for
+  for (p = &buf[3]; p <= &buf[63]; p += 2)
+    p[-2] = 6;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 6 * ((i & 1) && i <= 61))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for
+  for (p = &buf[16]; p < &buf[51]; p = 4 + p)
+    p[2] = 7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 7 * ((i & 3) == 2 && i >= 18 && i < 53))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for
+  for (p = &buf[16]; p <= &buf[40]; p = p + 4ULL)
+    p[2] = -7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != -7 * ((i & 3) == 2 && i >= 18 && i <= 42))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for
+  for (p = &buf[53]; p > &buf[9]; --p)
+    *p = 5;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 5 * (i >= 10 && i < 54))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for
+  for (p = &buf[63]; p >= &buf[3]; p -= 2)
+    p[-2] = 6;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 6 * ((i & 1) && i <= 61))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for
+  for (p = &buf[48]; p > &buf[15]; p = -4 + p)
+    p[2] = 7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 7 * ((i & 3) == 2 && i >= 18 && i < 53))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for
+  for (p = &buf[40]; p >= &buf[16]; p = p - 4ULL)
+    p[2] = -7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != -7 * ((i & 3) == 2 && i >= 18 && i <= 42))
+      abort ();
+  return 0;
+}
+
+int
+test2 (void)
+{
+  int buf[64], *p;
+  int i;
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (static, 3)
+  for (p = &buf[10]; p < &buf[54]; p++)
+    *p = 5;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 5 * (i >= 10 && i < 54))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (static, 3)
+  for (p = &buf[3]; p <= &buf[63]; p += 2)
+    p[-2] = 6;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 6 * ((i & 1) && i <= 61))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (static, 3)
+  for (p = &buf[16]; p < &buf[51]; p = 4 + p)
+    p[2] = 7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 7 * ((i & 3) == 2 && i >= 18 && i < 53))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (static, 3)
+  for (p = &buf[16]; p <= &buf[40]; p = p + 4ULL)
+    p[2] = -7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != -7 * ((i & 3) == 2 && i >= 18 && i <= 42))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (static, 3)
+  for (p = &buf[53]; p > &buf[9]; --p)
+    *p = 5;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 5 * (i >= 10 && i < 54))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (static, 3)
+  for (p = &buf[63]; p >= &buf[3]; p -= 2)
+    p[-2] = 6;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 6 * ((i & 1) && i <= 61))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (static, 3)
+  for (p = &buf[48]; p > &buf[15]; p = -4 + p)
+    p[2] = 7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 7 * ((i & 3) == 2 && i >= 18 && i < 53))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (static, 3)
+  for (p = &buf[40]; p >= &buf[16]; p = p - 4ULL)
+    p[2] = -7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != -7 * ((i & 3) == 2 && i >= 18 && i <= 42))
+      abort ();
+  return 0;
+}
+
+int
+test3 (void)
+{
+  int buf[64], *p;
+  int i;
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (dynamic, 3)
+  for (p = &buf[10]; p < &buf[54]; p++)
+    *p = 5;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 5 * (i >= 10 && i < 54))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (dynamic, 3)
+  for (p = &buf[3]; p <= &buf[63]; p += 2)
+    p[-2] = 6;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 6 * ((i & 1) && i <= 61))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (dynamic, 3)
+  for (p = &buf[16]; p < &buf[51]; p = 4 + p)
+    p[2] = 7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 7 * ((i & 3) == 2 && i >= 18 && i < 53))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (dynamic, 3)
+  for (p = &buf[16]; p <= &buf[40]; p = p + 4ULL)
+    p[2] = -7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != -7 * ((i & 3) == 2 && i >= 18 && i <= 42))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (dynamic, 3)
+  for (p = &buf[53]; p > &buf[9]; --p)
+    *p = 5;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 5 * (i >= 10 && i < 54))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (dynamic, 3)
+  for (p = &buf[63]; p >= &buf[3]; p -= 2)
+    p[-2] = 6;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 6 * ((i & 1) && i <= 61))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (dynamic, 3)
+  for (p = &buf[48]; p > &buf[15]; p = -4 + p)
+    p[2] = 7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 7 * ((i & 3) == 2 && i >= 18 && i < 53))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (dynamic, 3)
+  for (p = &buf[40]; p >= &buf[16]; p = p - 4ULL)
+    p[2] = -7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != -7 * ((i & 3) == 2 && i >= 18 && i <= 42))
+      abort ();
+  return 0;
+}
+
+int
+test4 (void)
+{
+  int buf[64], *p;
+  int i;
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (runtime)
+  for (p = &buf[10]; p < &buf[54]; p++)
+    *p = 5;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 5 * (i >= 10 && i < 54))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (runtime)
+  for (p = &buf[3]; p <= &buf[63]; p += 2)
+    p[-2] = 6;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 6 * ((i & 1) && i <= 61))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (runtime)
+  for (p = &buf[16]; p < &buf[51]; p = 4 + p)
+    p[2] = 7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 7 * ((i & 3) == 2 && i >= 18 && i < 53))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (runtime)
+  for (p = &buf[16]; p <= &buf[40]; p = p + 4ULL)
+    p[2] = -7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != -7 * ((i & 3) == 2 && i >= 18 && i <= 42))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (runtime)
+  for (p = &buf[53]; p > &buf[9]; --p)
+    *p = 5;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 5 * (i >= 10 && i < 54))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (runtime)
+  for (p = &buf[63]; p >= &buf[3]; p -= 2)
+    p[-2] = 6;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 6 * ((i & 1) && i <= 61))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (runtime)
+  for (p = &buf[48]; p > &buf[15]; p = -4 + p)
+    p[2] = 7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 7 * ((i & 3) == 2 && i >= 18 && i < 53))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (runtime)
+  for (p = &buf[40]; p >= &buf[16]; p = p - 4ULL)
+    p[2] = -7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != -7 * ((i & 3) == 2 && i >= 18 && i <= 42))
+      abort ();
+  return 0;
+}
+
+int
+main (void)
+{
+  test1 ();
+  test2 ();
+  test3 ();
+  omp_set_schedule (omp_sched_static, 0);
+  test4 ();
+  omp_set_schedule (omp_sched_static, 3);
+  test4 ();
+  omp_set_schedule (omp_sched_dynamic, 5);
+  test4 ();
+  omp_set_schedule (omp_sched_guided, 2);
+  test4 ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/loop-6.c.jj	2008-05-15 14:09:54.000000000 +0200
+++ libgomp/testsuite/libgomp.c/loop-6.c	2008-05-15 14:20:24.000000000 +0200
@@ -0,0 +1,387 @@
+/* { dg-do run } */
+
+#include <omp.h>
+
+extern void abort (void);
+
+#define LLONG_MAX __LONG_LONG_MAX__
+#define ULLONG_MAX (LLONG_MAX * 2ULL + 1)
+#define INT_MAX __INT_MAX__
+
+int arr[6 * 5];
+
+void
+set (int loopidx, int idx)
+{
+#pragma omp atomic
+  arr[loopidx * 5 + idx]++;
+}
+
+#define check(var, val, loopidx, idx) \
+  if (var == (val)) set (loopidx, idx); else
+#define test(loopidx, count) \
+  for (idx = 0; idx < 5; idx++) \
+    if (arr[loopidx * 5 + idx] != idx < count) \
+      abort (); \
+    else \
+      arr[loopidx * 5 + idx] = 0
+
+int
+test1 (void)
+{
+  int e = 0, idx;
+
+#pragma omp parallel reduction(+:e)
+  {
+    long long i;
+    unsigned long long j;
+    #pragma omp for schedule(dynamic,1) nowait
+    for (i = LLONG_MAX - 30001; i <= LLONG_MAX - 10001; i += 10000)
+      {
+	check (i, LLONG_MAX - 30001, 0, 0)
+	check (i, LLONG_MAX - 20001, 0, 1)
+	check (i, LLONG_MAX - 10001, 0, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(dynamic,1) nowait
+    for (i = -LLONG_MAX + 30000; i >= -LLONG_MAX + 10000; i -= 10000)
+      {
+	check (i, -LLONG_MAX + 30000, 1, 0)
+	check (i, -LLONG_MAX + 20000, 1, 1)
+	check (i, -LLONG_MAX + 10000, 1, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(dynamic,1) nowait
+    for (j = 20; j <= LLONG_MAX - 70; j += LLONG_MAX + 50ULL)
+      {
+	check (j, 20, 2, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(dynamic,1) nowait
+    for (j = ULLONG_MAX - 3; j >= LLONG_MAX + 70ULL; j -= LLONG_MAX + 50ULL)
+      {
+	check (j, ULLONG_MAX - 3, 3, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(dynamic,1) nowait
+    for (j = LLONG_MAX - 20000ULL; j <= LLONG_MAX + 10000ULL; j += 10000ULL)
+      {
+	check (j, LLONG_MAX - 20000ULL, 4, 0)
+	check (j, LLONG_MAX - 10000ULL, 4, 1)
+	check (j, LLONG_MAX, 4, 2)
+	check (j, LLONG_MAX + 10000ULL, 4, 3)
+	e = 1;
+      }
+    #pragma omp for schedule(dynamic,1) nowait
+    for (i = -3LL * INT_MAX - 20000LL; i <= INT_MAX + 10000LL; i += INT_MAX + 200LL)
+      {
+	check (i, -3LL * INT_MAX - 20000LL, 5, 0)
+	check (i, -2LL * INT_MAX - 20000LL + 200LL, 5, 1)
+	check (i, -INT_MAX - 20000LL + 400LL, 5, 2)
+	check (i, -20000LL + 600LL, 5, 3)
+	check (i, INT_MAX - 20000LL + 800LL, 5, 4)
+	e = 1;
+      }
+  }
+  if (e)
+    abort ();
+  test (0, 3);
+  test (1, 3);
+  test (2, 1);
+  test (3, 1);
+  test (4, 4);
+  test (5, 5);
+  return 0;
+}
+
+int
+test2 (void)
+{
+  int e = 0, idx;
+
+#pragma omp parallel reduction(+:e)
+  {
+    long long i;
+    unsigned long long j;
+    #pragma omp for schedule(guided,1) nowait
+    for (i = LLONG_MAX - 30001; i <= LLONG_MAX - 10001; i += 10000)
+      {
+	check (i, LLONG_MAX - 30001, 0, 0)
+	check (i, LLONG_MAX - 20001, 0, 1)
+	check (i, LLONG_MAX - 10001, 0, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(guided,1) nowait
+    for (i = -LLONG_MAX + 30000; i >= -LLONG_MAX + 10000; i -= 10000)
+      {
+	check (i, -LLONG_MAX + 30000, 1, 0)
+	check (i, -LLONG_MAX + 20000, 1, 1)
+	check (i, -LLONG_MAX + 10000, 1, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(guided,1) nowait
+    for (j = 20; j <= LLONG_MAX - 70; j += LLONG_MAX + 50ULL)
+      {
+	check (j, 20, 2, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(guided,1) nowait
+    for (j = ULLONG_MAX - 3; j >= LLONG_MAX + 70ULL; j -= LLONG_MAX + 50ULL)
+      {
+	check (j, ULLONG_MAX - 3, 3, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(guided,1) nowait
+    for (j = LLONG_MAX - 20000ULL; j <= LLONG_MAX + 10000ULL; j += 10000ULL)
+      {
+	check (j, LLONG_MAX - 20000ULL, 4, 0)
+	check (j, LLONG_MAX - 10000ULL, 4, 1)
+	check (j, LLONG_MAX, 4, 2)
+	check (j, LLONG_MAX + 10000ULL, 4, 3)
+	e = 1;
+      }
+    #pragma omp for schedule(guided,1) nowait
+    for (i = -3LL * INT_MAX - 20000LL; i <= INT_MAX + 10000LL; i += INT_MAX + 200LL)
+      {
+	check (i, -3LL * INT_MAX - 20000LL, 5, 0)
+	check (i, -2LL * INT_MAX - 20000LL + 200LL, 5, 1)
+	check (i, -INT_MAX - 20000LL + 400LL, 5, 2)
+	check (i, -20000LL + 600LL, 5, 3)
+	check (i, INT_MAX - 20000LL + 800LL, 5, 4)
+	e = 1;
+      }
+  }
+  if (e)
+    abort ();
+  test (0, 3);
+  test (1, 3);
+  test (2, 1);
+  test (3, 1);
+  test (4, 4);
+  test (5, 5);
+  return 0;
+}
+
+int
+test3 (void)
+{
+  int e = 0, idx;
+
+#pragma omp parallel reduction(+:e)
+  {
+    long long i;
+    unsigned long long j;
+    #pragma omp for schedule(static) nowait
+    for (i = LLONG_MAX - 30001; i <= LLONG_MAX - 10001; i += 10000)
+      {
+	check (i, LLONG_MAX - 30001, 0, 0)
+	check (i, LLONG_MAX - 20001, 0, 1)
+	check (i, LLONG_MAX - 10001, 0, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(static) nowait
+    for (i = -LLONG_MAX + 30000; i >= -LLONG_MAX + 10000; i -= 10000)
+      {
+	check (i, -LLONG_MAX + 30000, 1, 0)
+	check (i, -LLONG_MAX + 20000, 1, 1)
+	check (i, -LLONG_MAX + 10000, 1, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(static) nowait
+    for (j = 20; j <= LLONG_MAX - 70; j += LLONG_MAX + 50ULL)
+      {
+	check (j, 20, 2, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(static) nowait
+    for (j = ULLONG_MAX - 3; j >= LLONG_MAX + 70ULL; j -= LLONG_MAX + 50ULL)
+      {
+	check (j, ULLONG_MAX - 3, 3, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(static) nowait
+    for (j = LLONG_MAX - 20000ULL; j <= LLONG_MAX + 10000ULL; j += 10000ULL)
+      {
+	check (j, LLONG_MAX - 20000ULL, 4, 0)
+	check (j, LLONG_MAX - 10000ULL, 4, 1)
+	check (j, LLONG_MAX, 4, 2)
+	check (j, LLONG_MAX + 10000ULL, 4, 3)
+	e = 1;
+      }
+    #pragma omp for schedule(static) nowait
+    for (i = -3LL * INT_MAX - 20000LL; i <= INT_MAX + 10000LL; i += INT_MAX + 200LL)
+      {
+	check (i, -3LL * INT_MAX - 20000LL, 5, 0)
+	check (i, -2LL * INT_MAX - 20000LL + 200LL, 5, 1)
+	check (i, -INT_MAX - 20000LL + 400LL, 5, 2)
+	check (i, -20000LL + 600LL, 5, 3)
+	check (i, INT_MAX - 20000LL + 800LL, 5, 4)
+	e = 1;
+      }
+  }
+  if (e)
+    abort ();
+  test (0, 3);
+  test (1, 3);
+  test (2, 1);
+  test (3, 1);
+  test (4, 4);
+  test (5, 5);
+  return 0;
+}
+
+int
+test4 (void)
+{
+  int e = 0, idx;
+
+#pragma omp parallel reduction(+:e)
+  {
+    long long i;
+    unsigned long long j;
+    #pragma omp for schedule(static,1) nowait
+    for (i = LLONG_MAX - 30001; i <= LLONG_MAX - 10001; i += 10000)
+      {
+	check (i, LLONG_MAX - 30001, 0, 0)
+	check (i, LLONG_MAX - 20001, 0, 1)
+	check (i, LLONG_MAX - 10001, 0, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(static,1) nowait
+    for (i = -LLONG_MAX + 30000; i >= -LLONG_MAX + 10000; i -= 10000)
+      {
+	check (i, -LLONG_MAX + 30000, 1, 0)
+	check (i, -LLONG_MAX + 20000, 1, 1)
+	check (i, -LLONG_MAX + 10000, 1, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(static,1) nowait
+    for (j = 20; j <= LLONG_MAX - 70; j += LLONG_MAX + 50ULL)
+      {
+	check (j, 20, 2, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(static,1) nowait
+    for (j = ULLONG_MAX - 3; j >= LLONG_MAX + 70ULL; j -= LLONG_MAX + 50ULL)
+      {
+	check (j, ULLONG_MAX - 3, 3, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(static,1) nowait
+    for (j = LLONG_MAX - 20000ULL; j <= LLONG_MAX + 10000ULL; j += 10000ULL)
+      {
+	check (j, LLONG_MAX - 20000ULL, 4, 0)
+	check (j, LLONG_MAX - 10000ULL, 4, 1)
+	check (j, LLONG_MAX, 4, 2)
+	check (j, LLONG_MAX + 10000ULL, 4, 3)
+	e = 1;
+      }
+    #pragma omp for schedule(static,1) nowait
+    for (i = -3LL * INT_MAX - 20000LL; i <= INT_MAX + 10000LL; i += INT_MAX + 200LL)
+      {
+	check (i, -3LL * INT_MAX - 20000LL, 5, 0)
+	check (i, -2LL * INT_MAX - 20000LL + 200LL, 5, 1)
+	check (i, -INT_MAX - 20000LL + 400LL, 5, 2)
+	check (i, -20000LL + 600LL, 5, 3)
+	check (i, INT_MAX - 20000LL + 800LL, 5, 4)
+	e = 1;
+      }
+  }
+  if (e)
+    abort ();
+  test (0, 3);
+  test (1, 3);
+  test (2, 1);
+  test (3, 1);
+  test (4, 4);
+  test (5, 5);
+  return 0;
+}
+
+int
+test5 (void)
+{
+  int e = 0, idx;
+
+#pragma omp parallel reduction(+:e)
+  {
+    long long i;
+    unsigned long long j;
+    #pragma omp for schedule(runtime) nowait
+    for (i = LLONG_MAX - 30001; i <= LLONG_MAX - 10001; i += 10000)
+      {
+	check (i, LLONG_MAX - 30001, 0, 0)
+	check (i, LLONG_MAX - 20001, 0, 1)
+	check (i, LLONG_MAX - 10001, 0, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(runtime) nowait
+    for (i = -LLONG_MAX + 30000; i >= -LLONG_MAX + 10000; i -= 10000)
+      {
+	check (i, -LLONG_MAX + 30000, 1, 0)
+	check (i, -LLONG_MAX + 20000, 1, 1)
+	check (i, -LLONG_MAX + 10000, 1, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(runtime) nowait
+    for (j = 20; j <= LLONG_MAX - 70; j += LLONG_MAX + 50ULL)
+      {
+	check (j, 20, 2, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(runtime) nowait
+    for (j = ULLONG_MAX - 3; j >= LLONG_MAX + 70ULL; j -= LLONG_MAX + 50ULL)
+      {
+	check (j, ULLONG_MAX - 3, 3, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(runtime) nowait
+    for (j = LLONG_MAX - 20000ULL; j <= LLONG_MAX + 10000ULL; j += 10000ULL)
+      {
+	check (j, LLONG_MAX - 20000ULL, 4, 0)
+	check (j, LLONG_MAX - 10000ULL, 4, 1)
+	check (j, LLONG_MAX, 4, 2)
+	check (j, LLONG_MAX + 10000ULL, 4, 3)
+	e = 1;
+      }
+    #pragma omp for schedule(runtime) nowait
+    for (i = -3LL * INT_MAX - 20000LL; i <= INT_MAX + 10000LL; i += INT_MAX + 200LL)
+      {
+	check (i, -3LL * INT_MAX - 20000LL, 5, 0)
+	check (i, -2LL * INT_MAX - 20000LL + 200LL, 5, 1)
+	check (i, -INT_MAX - 20000LL + 400LL, 5, 2)
+	check (i, -20000LL + 600LL, 5, 3)
+	check (i, INT_MAX - 20000LL + 800LL, 5, 4)
+	e = 1;
+      }
+  }
+  if (e)
+    abort ();
+  test (0, 3);
+  test (1, 3);
+  test (2, 1);
+  test (3, 1);
+  test (4, 4);
+  test (5, 5);
+  return 0;
+}
+
+int
+main (void)
+{
+  if (2 * sizeof (int) != sizeof (long long))
+    return 0;
+  test1 ();
+  test2 ();
+  test3 ();
+  test4 ();
+  omp_set_schedule (omp_sched_static, 0);
+  test5 ();
+  omp_set_schedule (omp_sched_static, 3);
+  test5 ();
+  omp_set_schedule (omp_sched_dynamic, 5);
+  test5 ();
+  omp_set_schedule (omp_sched_guided, 2);
+  test5 ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/loop-7.c.jj	2008-05-15 17:23:51.000000000 +0200
+++ libgomp/testsuite/libgomp.c/loop-7.c	2008-05-15 16:44:25.000000000 +0200
@@ -0,0 +1,105 @@
+/* { dg-do run } */
+
+#include <omp.h>
+
+extern void abort (void);
+
+#define LLONG_MAX __LONG_LONG_MAX__
+#define ULLONG_MAX (LLONG_MAX * 2ULL + 1)
+#define INT_MAX __INT_MAX__
+
+int v;
+
+int
+test1 (void)
+{
+  int e = 0, cnt = 0;
+  long long i;
+  unsigned long long j;
+  char buf[6], *p;
+
+  #pragma omp for schedule(dynamic,1) collapse(2) nowait
+  for (i = LLONG_MAX - 30001; i <= LLONG_MAX - 10001; i += 10000)
+    for (j = 20; j <= LLONG_MAX - 70; j += LLONG_MAX + 50ULL)
+      if ((i != LLONG_MAX - 30001
+	   && i != LLONG_MAX - 20001
+	   && i != LLONG_MAX - 10001)
+	  || j != 20)
+	e = 1;
+      else
+	cnt++;
+  if (e || cnt != 3)
+    abort ();
+  else
+    cnt = 0;
+
+  #pragma omp for schedule(guided,1) collapse(2) nowait
+  for (i = -LLONG_MAX + 30000; i >= -LLONG_MAX + 10000; i -= 10000)
+    for (j = ULLONG_MAX - 3; j >= LLONG_MAX + 70ULL; j -= LLONG_MAX + 50ULL)
+      if ((i != -LLONG_MAX + 30000
+	   && i != -LLONG_MAX + 20000
+	   && i != -LLONG_MAX + 10000)
+	  || j != ULLONG_MAX - 3)
+	e = 1;
+      else
+	cnt++;
+  if (e || cnt != 3)
+    abort ();
+  else
+    cnt = 0;
+
+  #pragma omp for schedule(static,1) collapse(2) nowait
+  for (i = LLONG_MAX - 30001; i <= LLONG_MAX - 10001; i += 10000)
+    for (j = 20; j <= LLONG_MAX - 70 + v; j += LLONG_MAX + 50ULL)
+      if ((i != LLONG_MAX - 30001
+	   && i != LLONG_MAX - 20001
+	   && i != LLONG_MAX - 10001)
+	  || j != 20)
+	e = 1;
+      else
+	cnt++;
+  if (e || cnt != 3)
+    abort ();
+  else
+    cnt = 0;
+
+  #pragma omp for schedule(static) collapse(2) nowait
+  for (i = -LLONG_MAX + 30000 + v; i >= -LLONG_MAX + 10000; i -= 10000)
+    for (j = ULLONG_MAX - 3; j >= LLONG_MAX + 70ULL; j -= LLONG_MAX + 50ULL)
+      if ((i != -LLONG_MAX + 30000
+	   && i != -LLONG_MAX + 20000
+	   && i != -LLONG_MAX + 10000)
+	  || j != ULLONG_MAX - 3)
+	e = 1;
+      else
+	cnt++;
+  if (e || cnt != 3)
+    abort ();
+  else
+    cnt = 0;
+
+  #pragma omp for schedule(runtime) collapse(2) nowait
+  for (i = 10; i < 30; i++)
+    for (p = buf; p <= buf + 4; p += 2)
+      if (i < 10 || i >= 30 || (p != buf && p != buf + 2 && p != buf + 4))
+	e = 1;
+      else
+	cnt++;
+  if (e || cnt != 60)
+    abort ();
+  else
+    cnt = 0;
+
+  return 0;
+}
+
+int
+main (void)
+{
+  if (2 * sizeof (int) != sizeof (long long))
+    return 0;
+  asm volatile ("" : "+r" (v));
+  omp_set_schedule (omp_sched_dynamic, 1);
+  test1 ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/loop-8.C.jj	2008-05-15 16:12:01.000000000 +0200
+++ libgomp/testsuite/libgomp.c++/loop-8.C	2008-05-15 16:18:19.000000000 +0200
@@ -0,0 +1,276 @@
+#include <omp.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+test1 ()
+{
+  short int buf[64], *p;
+  int i;
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for
+  for (p = &buf[10]; p < &buf[54]; p++)
+    *p = 5;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 5 * (i >= 10 && i < 54))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for
+  for (p = &buf[3]; p <= &buf[63]; p += 2)
+    p[-2] = 6;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 6 * ((i & 1) && i <= 61))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for
+  for (p = &buf[16]; p < &buf[51]; p = 4 + p)
+    p[2] = 7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 7 * ((i & 3) == 2 && i >= 18 && i < 53))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for
+  for (p = &buf[16]; p <= &buf[40]; p = p + 4ULL)
+    p[2] = -7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != -7 * ((i & 3) == 2 && i >= 18 && i <= 42))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for
+  for (p = &buf[53]; p > &buf[9]; --p)
+    *p = 5;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 5 * (i >= 10 && i < 54))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for
+  for (p = &buf[63]; p >= &buf[3]; p -= 2)
+    p[-2] = 6;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 6 * ((i & 1) && i <= 61))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for
+  for (p = &buf[48]; p > &buf[15]; p = -4 + p)
+    p[2] = 7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 7 * ((i & 3) == 2 && i >= 18 && i < 53))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for
+  for (p = &buf[40]; p >= &buf[16]; p = p - 4ULL)
+    p[2] = -7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != -7 * ((i & 3) == 2 && i >= 18 && i <= 42))
+      abort ();
+  return 0;
+}
+
+int
+test2 ()
+{
+  int buf[64], *p;
+  int i;
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (static, 3)
+  for (p = &buf[10]; p < &buf[54]; p++)
+    *p = 5;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 5 * (i >= 10 && i < 54))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (static, 3)
+  for (p = &buf[3]; p <= &buf[63]; p += 2)
+    p[-2] = 6;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 6 * ((i & 1) && i <= 61))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (static, 3)
+  for (p = &buf[16]; p < &buf[51]; p = 4 + p)
+    p[2] = 7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 7 * ((i & 3) == 2 && i >= 18 && i < 53))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (static, 3)
+  for (p = &buf[16]; p <= &buf[40]; p = p + 4ULL)
+    p[2] = -7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != -7 * ((i & 3) == 2 && i >= 18 && i <= 42))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (static, 3)
+  for (p = &buf[53]; p > &buf[9]; --p)
+    *p = 5;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 5 * (i >= 10 && i < 54))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (static, 3)
+  for (p = &buf[63]; p >= &buf[3]; p -= 2)
+    p[-2] = 6;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 6 * ((i & 1) && i <= 61))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (static, 3)
+  for (p = &buf[48]; p > &buf[15]; p = -4 + p)
+    p[2] = 7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 7 * ((i & 3) == 2 && i >= 18 && i < 53))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (static, 3)
+  for (p = &buf[40]; p >= &buf[16]; p = p - 4ULL)
+    p[2] = -7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != -7 * ((i & 3) == 2 && i >= 18 && i <= 42))
+      abort ();
+  return 0;
+}
+
+int
+test3 ()
+{
+  int buf[64], *p;
+  int i;
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (dynamic, 3)
+  for (p = &buf[10]; p < &buf[54]; p++)
+    *p = 5;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 5 * (i >= 10 && i < 54))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (dynamic, 3)
+  for (p = &buf[3]; p <= &buf[63]; p += 2)
+    p[-2] = 6;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 6 * ((i & 1) && i <= 61))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (dynamic, 3)
+  for (p = &buf[16]; p < &buf[51]; p = 4 + p)
+    p[2] = 7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 7 * ((i & 3) == 2 && i >= 18 && i < 53))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (dynamic, 3)
+  for (p = &buf[16]; p <= &buf[40]; p = p + 4ULL)
+    p[2] = -7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != -7 * ((i & 3) == 2 && i >= 18 && i <= 42))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (dynamic, 3)
+  for (p = &buf[53]; p > &buf[9]; --p)
+    *p = 5;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 5 * (i >= 10 && i < 54))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (dynamic, 3)
+  for (p = &buf[63]; p >= &buf[3]; p -= 2)
+    p[-2] = 6;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 6 * ((i & 1) && i <= 61))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (dynamic, 3)
+  for (p = &buf[48]; p > &buf[15]; p = -4 + p)
+    p[2] = 7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 7 * ((i & 3) == 2 && i >= 18 && i < 53))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (dynamic, 3)
+  for (p = &buf[40]; p >= &buf[16]; p = p - 4ULL)
+    p[2] = -7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != -7 * ((i & 3) == 2 && i >= 18 && i <= 42))
+      abort ();
+  return 0;
+}
+
+int
+test4 ()
+{
+  int buf[64], *p;
+  int i;
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (runtime)
+  for (p = &buf[10]; p < &buf[54]; p++)
+    *p = 5;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 5 * (i >= 10 && i < 54))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (runtime)
+  for (p = &buf[3]; p <= &buf[63]; p += 2)
+    p[-2] = 6;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 6 * ((i & 1) && i <= 61))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (runtime)
+  for (p = &buf[16]; p < &buf[51]; p = 4 + p)
+    p[2] = 7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 7 * ((i & 3) == 2 && i >= 18 && i < 53))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (runtime)
+  for (p = &buf[16]; p <= &buf[40]; p = p + 4ULL)
+    p[2] = -7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != -7 * ((i & 3) == 2 && i >= 18 && i <= 42))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (runtime)
+  for (p = &buf[53]; p > &buf[9]; --p)
+    *p = 5;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 5 * (i >= 10 && i < 54))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (runtime)
+  for (p = &buf[63]; p >= &buf[3]; p -= 2)
+    p[-2] = 6;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 6 * ((i & 1) && i <= 61))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (runtime)
+  for (p = &buf[48]; p > &buf[15]; p = -4 + p)
+    p[2] = 7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != 7 * ((i & 3) == 2 && i >= 18 && i < 53))
+      abort ();
+  memset (buf, '\0', sizeof (buf));
+#pragma omp parallel for schedule (runtime)
+  for (p = &buf[40]; p >= &buf[16]; p = p - 4ULL)
+    p[2] = -7;
+  for (i = 0; i < 64; i++)
+    if (buf[i] != -7 * ((i & 3) == 2 && i >= 18 && i <= 42))
+      abort ();
+  return 0;
+}
+
+int
+main ()
+{
+  test1 ();
+  test2 ();
+  test3 ();
+  omp_set_schedule (omp_sched_static, 0);
+  test4 ();
+  omp_set_schedule (omp_sched_static, 3);
+  test4 ();
+  omp_set_schedule (omp_sched_dynamic, 5);
+  test4 ();
+  omp_set_schedule (omp_sched_guided, 2);
+  test4 ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/loop-9.C.jj	2008-05-15 16:12:04.000000000 +0200
+++ libgomp/testsuite/libgomp.c++/loop-9.C	2008-05-15 16:19:22.000000000 +0200
@@ -0,0 +1,387 @@
+// { dg-do run }
+
+#include <omp.h>
+
+extern "C" void abort ();
+
+#define LLONG_MAX __LONG_LONG_MAX__
+#define ULLONG_MAX (LLONG_MAX * 2ULL + 1)
+#define INT_MAX __INT_MAX__
+
+int arr[6 * 5];
+
+void
+set (int loopidx, int idx)
+{
+#pragma omp atomic
+  arr[loopidx * 5 + idx]++;
+}
+
+#define check(var, val, loopidx, idx) \
+  if (var == (val)) set (loopidx, idx); else
+#define test(loopidx, count) \
+  for (idx = 0; idx < 5; idx++) \
+    if (arr[loopidx * 5 + idx] != idx < count) \
+      abort (); \
+    else \
+      arr[loopidx * 5 + idx] = 0
+
+int
+test1 ()
+{
+  int e = 0, idx;
+
+#pragma omp parallel reduction(+:e)
+  {
+    long long i;
+    unsigned long long j;
+    #pragma omp for schedule(dynamic,1) nowait
+    for (i = LLONG_MAX - 30001; i <= LLONG_MAX - 10001; i += 10000)
+      {
+	check (i, LLONG_MAX - 30001, 0, 0)
+	check (i, LLONG_MAX - 20001, 0, 1)
+	check (i, LLONG_MAX - 10001, 0, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(dynamic,1) nowait
+    for (i = -LLONG_MAX + 30000; i >= -LLONG_MAX + 10000; i -= 10000)
+      {
+	check (i, -LLONG_MAX + 30000, 1, 0)
+	check (i, -LLONG_MAX + 20000, 1, 1)
+	check (i, -LLONG_MAX + 10000, 1, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(dynamic,1) nowait
+    for (j = 20; j <= LLONG_MAX - 70; j += LLONG_MAX + 50ULL)
+      {
+	check (j, 20, 2, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(dynamic,1) nowait
+    for (j = ULLONG_MAX - 3; j >= LLONG_MAX + 70ULL; j -= LLONG_MAX + 50ULL)
+      {
+	check (j, ULLONG_MAX - 3, 3, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(dynamic,1) nowait
+    for (j = LLONG_MAX - 20000ULL; j <= LLONG_MAX + 10000ULL; j += 10000ULL)
+      {
+	check (j, LLONG_MAX - 20000ULL, 4, 0)
+	check (j, LLONG_MAX - 10000ULL, 4, 1)
+	check (j, LLONG_MAX, 4, 2)
+	check (j, LLONG_MAX + 10000ULL, 4, 3)
+	e = 1;
+      }
+    #pragma omp for schedule(dynamic,1) nowait
+    for (i = -3LL * INT_MAX - 20000LL; i <= INT_MAX + 10000LL; i += INT_MAX + 200LL)
+      {
+	check (i, -3LL * INT_MAX - 20000LL, 5, 0)
+	check (i, -2LL * INT_MAX - 20000LL + 200LL, 5, 1)
+	check (i, -INT_MAX - 20000LL + 400LL, 5, 2)
+	check (i, -20000LL + 600LL, 5, 3)
+	check (i, INT_MAX - 20000LL + 800LL, 5, 4)
+	e = 1;
+      }
+  }
+  if (e)
+    abort ();
+  test (0, 3);
+  test (1, 3);
+  test (2, 1);
+  test (3, 1);
+  test (4, 4);
+  test (5, 5);
+  return 0;
+}
+
+int
+test2 ()
+{
+  int e = 0, idx;
+
+#pragma omp parallel reduction(+:e)
+  {
+    long long i;
+    unsigned long long j;
+    #pragma omp for schedule(guided,1) nowait
+    for (i = LLONG_MAX - 30001; i <= LLONG_MAX - 10001; i += 10000)
+      {
+	check (i, LLONG_MAX - 30001, 0, 0)
+	check (i, LLONG_MAX - 20001, 0, 1)
+	check (i, LLONG_MAX - 10001, 0, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(guided,1) nowait
+    for (i = -LLONG_MAX + 30000; i >= -LLONG_MAX + 10000; i -= 10000)
+      {
+	check (i, -LLONG_MAX + 30000, 1, 0)
+	check (i, -LLONG_MAX + 20000, 1, 1)
+	check (i, -LLONG_MAX + 10000, 1, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(guided,1) nowait
+    for (j = 20; j <= LLONG_MAX - 70; j += LLONG_MAX + 50ULL)
+      {
+	check (j, 20, 2, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(guided,1) nowait
+    for (j = ULLONG_MAX - 3; j >= LLONG_MAX + 70ULL; j -= LLONG_MAX + 50ULL)
+      {
+	check (j, ULLONG_MAX - 3, 3, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(guided,1) nowait
+    for (j = LLONG_MAX - 20000ULL; j <= LLONG_MAX + 10000ULL; j += 10000ULL)
+      {
+	check (j, LLONG_MAX - 20000ULL, 4, 0)
+	check (j, LLONG_MAX - 10000ULL, 4, 1)
+	check (j, LLONG_MAX, 4, 2)
+	check (j, LLONG_MAX + 10000ULL, 4, 3)
+	e = 1;
+      }
+    #pragma omp for schedule(guided,1) nowait
+    for (i = -3LL * INT_MAX - 20000LL; i <= INT_MAX + 10000LL; i += INT_MAX + 200LL)
+      {
+	check (i, -3LL * INT_MAX - 20000LL, 5, 0)
+	check (i, -2LL * INT_MAX - 20000LL + 200LL, 5, 1)
+	check (i, -INT_MAX - 20000LL + 400LL, 5, 2)
+	check (i, -20000LL + 600LL, 5, 3)
+	check (i, INT_MAX - 20000LL + 800LL, 5, 4)
+	e = 1;
+      }
+  }
+  if (e)
+    abort ();
+  test (0, 3);
+  test (1, 3);
+  test (2, 1);
+  test (3, 1);
+  test (4, 4);
+  test (5, 5);
+  return 0;
+}
+
+int
+test3 ()
+{
+  int e = 0, idx;
+
+#pragma omp parallel reduction(+:e)
+  {
+    long long i;
+    unsigned long long j;
+    #pragma omp for schedule(static) nowait
+    for (i = LLONG_MAX - 30001; i <= LLONG_MAX - 10001; i += 10000)
+      {
+	check (i, LLONG_MAX - 30001, 0, 0)
+	check (i, LLONG_MAX - 20001, 0, 1)
+	check (i, LLONG_MAX - 10001, 0, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(static) nowait
+    for (i = -LLONG_MAX + 30000; i >= -LLONG_MAX + 10000; i -= 10000)
+      {
+	check (i, -LLONG_MAX + 30000, 1, 0)
+	check (i, -LLONG_MAX + 20000, 1, 1)
+	check (i, -LLONG_MAX + 10000, 1, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(static) nowait
+    for (j = 20; j <= LLONG_MAX - 70; j += LLONG_MAX + 50ULL)
+      {
+	check (j, 20, 2, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(static) nowait
+    for (j = ULLONG_MAX - 3; j >= LLONG_MAX + 70ULL; j -= LLONG_MAX + 50ULL)
+      {
+	check (j, ULLONG_MAX - 3, 3, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(static) nowait
+    for (j = LLONG_MAX - 20000ULL; j <= LLONG_MAX + 10000ULL; j += 10000ULL)
+      {
+	check (j, LLONG_MAX - 20000ULL, 4, 0)
+	check (j, LLONG_MAX - 10000ULL, 4, 1)
+	check (j, LLONG_MAX, 4, 2)
+	check (j, LLONG_MAX + 10000ULL, 4, 3)
+	e = 1;
+      }
+    #pragma omp for schedule(static) nowait
+    for (i = -3LL * INT_MAX - 20000LL; i <= INT_MAX + 10000LL; i += INT_MAX + 200LL)
+      {
+	check (i, -3LL * INT_MAX - 20000LL, 5, 0)
+	check (i, -2LL * INT_MAX - 20000LL + 200LL, 5, 1)
+	check (i, -INT_MAX - 20000LL + 400LL, 5, 2)
+	check (i, -20000LL + 600LL, 5, 3)
+	check (i, INT_MAX - 20000LL + 800LL, 5, 4)
+	e = 1;
+      }
+  }
+  if (e)
+    abort ();
+  test (0, 3);
+  test (1, 3);
+  test (2, 1);
+  test (3, 1);
+  test (4, 4);
+  test (5, 5);
+  return 0;
+}
+
+int
+test4 ()
+{
+  int e = 0, idx;
+
+#pragma omp parallel reduction(+:e)
+  {
+    long long i;
+    unsigned long long j;
+    #pragma omp for schedule(static,1) nowait
+    for (i = LLONG_MAX - 30001; i <= LLONG_MAX - 10001; i += 10000)
+      {
+	check (i, LLONG_MAX - 30001, 0, 0)
+	check (i, LLONG_MAX - 20001, 0, 1)
+	check (i, LLONG_MAX - 10001, 0, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(static,1) nowait
+    for (i = -LLONG_MAX + 30000; i >= -LLONG_MAX + 10000; i -= 10000)
+      {
+	check (i, -LLONG_MAX + 30000, 1, 0)
+	check (i, -LLONG_MAX + 20000, 1, 1)
+	check (i, -LLONG_MAX + 10000, 1, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(static,1) nowait
+    for (j = 20; j <= LLONG_MAX - 70; j += LLONG_MAX + 50ULL)
+      {
+	check (j, 20, 2, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(static,1) nowait
+    for (j = ULLONG_MAX - 3; j >= LLONG_MAX + 70ULL; j -= LLONG_MAX + 50ULL)
+      {
+	check (j, ULLONG_MAX - 3, 3, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(static,1) nowait
+    for (j = LLONG_MAX - 20000ULL; j <= LLONG_MAX + 10000ULL; j += 10000ULL)
+      {
+	check (j, LLONG_MAX - 20000ULL, 4, 0)
+	check (j, LLONG_MAX - 10000ULL, 4, 1)
+	check (j, LLONG_MAX, 4, 2)
+	check (j, LLONG_MAX + 10000ULL, 4, 3)
+	e = 1;
+      }
+    #pragma omp for schedule(static,1) nowait
+    for (i = -3LL * INT_MAX - 20000LL; i <= INT_MAX + 10000LL; i += INT_MAX + 200LL)
+      {
+	check (i, -3LL * INT_MAX - 20000LL, 5, 0)
+	check (i, -2LL * INT_MAX - 20000LL + 200LL, 5, 1)
+	check (i, -INT_MAX - 20000LL + 400LL, 5, 2)
+	check (i, -20000LL + 600LL, 5, 3)
+	check (i, INT_MAX - 20000LL + 800LL, 5, 4)
+	e = 1;
+      }
+  }
+  if (e)
+    abort ();
+  test (0, 3);
+  test (1, 3);
+  test (2, 1);
+  test (3, 1);
+  test (4, 4);
+  test (5, 5);
+  return 0;
+}
+
+int
+test5 ()
+{
+  int e = 0, idx;
+
+#pragma omp parallel reduction(+:e)
+  {
+    long long i;
+    unsigned long long j;
+    #pragma omp for schedule(runtime) nowait
+    for (i = LLONG_MAX - 30001; i <= LLONG_MAX - 10001; i += 10000)
+      {
+	check (i, LLONG_MAX - 30001, 0, 0)
+	check (i, LLONG_MAX - 20001, 0, 1)
+	check (i, LLONG_MAX - 10001, 0, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(runtime) nowait
+    for (i = -LLONG_MAX + 30000; i >= -LLONG_MAX + 10000; i -= 10000)
+      {
+	check (i, -LLONG_MAX + 30000, 1, 0)
+	check (i, -LLONG_MAX + 20000, 1, 1)
+	check (i, -LLONG_MAX + 10000, 1, 2)
+	e = 1;
+      }
+    #pragma omp for schedule(runtime) nowait
+    for (j = 20; j <= LLONG_MAX - 70; j += LLONG_MAX + 50ULL)
+      {
+	check (j, 20, 2, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(runtime) nowait
+    for (j = ULLONG_MAX - 3; j >= LLONG_MAX + 70ULL; j -= LLONG_MAX + 50ULL)
+      {
+	check (j, ULLONG_MAX - 3, 3, 0)
+	e = 1;
+      }
+    #pragma omp for schedule(runtime) nowait
+    for (j = LLONG_MAX - 20000ULL; j <= LLONG_MAX + 10000ULL; j += 10000ULL)
+      {
+	check (j, LLONG_MAX - 20000ULL, 4, 0)
+	check (j, LLONG_MAX - 10000ULL, 4, 1)
+	check (j, LLONG_MAX, 4, 2)
+	check (j, LLONG_MAX + 10000ULL, 4, 3)
+	e = 1;
+      }
+    #pragma omp for schedule(runtime) nowait
+    for (i = -3LL * INT_MAX - 20000LL; i <= INT_MAX + 10000LL; i += INT_MAX + 200LL)
+      {
+	check (i, -3LL * INT_MAX - 20000LL, 5, 0)
+	check (i, -2LL * INT_MAX - 20000LL + 200LL, 5, 1)
+	check (i, -INT_MAX - 20000LL + 400LL, 5, 2)
+	check (i, -20000LL + 600LL, 5, 3)
+	check (i, INT_MAX - 20000LL + 800LL, 5, 4)
+	e = 1;
+      }
+  }
+  if (e)
+    abort ();
+  test (0, 3);
+  test (1, 3);
+  test (2, 1);
+  test (3, 1);
+  test (4, 4);
+  test (5, 5);
+  return 0;
+}
+
+int
+main ()
+{
+  if (2 * sizeof (int) != sizeof (long long))
+    return 0;
+  test1 ();
+  test2 ();
+  test3 ();
+  test4 ();
+  omp_set_schedule (omp_sched_static, 0);
+  test5 ();
+  omp_set_schedule (omp_sched_static, 3);
+  test5 ();
+  omp_set_schedule (omp_sched_dynamic, 5);
+  test5 ();
+  omp_set_schedule (omp_sched_guided, 2);
+  test5 ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c++/loop-10.C.jj	2008-05-15 17:24:25.000000000 +0200
+++ libgomp/testsuite/libgomp.c++/loop-10.C	2008-05-15 17:24:36.000000000 +0200
@@ -0,0 +1,105 @@
+// { dg-do run }
+
+#include <omp.h>
+
+extern "C" void abort (void);
+
+#define LLONG_MAX __LONG_LONG_MAX__
+#define ULLONG_MAX (LLONG_MAX * 2ULL + 1)
+#define INT_MAX __INT_MAX__
+
+int v;
+
+int
+test1 (void)
+{
+  int e = 0, cnt = 0;
+  long long i;
+  unsigned long long j;
+  char buf[6], *p;
+
+  #pragma omp for schedule(dynamic,1) collapse(2) nowait
+  for (i = LLONG_MAX - 30001; i <= LLONG_MAX - 10001; i += 10000)
+    for (j = 20; j <= LLONG_MAX - 70; j += LLONG_MAX + 50ULL)
+      if ((i != LLONG_MAX - 30001
+	   && i != LLONG_MAX - 20001
+	   && i != LLONG_MAX - 10001)
+	  || j != 20)
+	e = 1;
+      else
+	cnt++;
+  if (e || cnt != 3)
+    abort ();
+  else
+    cnt = 0;
+
+  #pragma omp for schedule(guided,1) collapse(2) nowait
+  for (i = -LLONG_MAX + 30000; i >= -LLONG_MAX + 10000; i -= 10000)
+    for (j = ULLONG_MAX - 3; j >= LLONG_MAX + 70ULL; j -= LLONG_MAX + 50ULL)
+      if ((i != -LLONG_MAX + 30000
+	   && i != -LLONG_MAX + 20000
+	   && i != -LLONG_MAX + 10000)
+	  || j != ULLONG_MAX - 3)
+	e = 1;
+      else
+	cnt++;
+  if (e || cnt != 3)
+    abort ();
+  else
+    cnt = 0;
+
+  #pragma omp for schedule(static,1) collapse(2) nowait
+  for (i = LLONG_MAX - 30001; i <= LLONG_MAX - 10001; i += 10000)
+    for (j = 20; j <= LLONG_MAX - 70 + v; j += LLONG_MAX + 50ULL)
+      if ((i != LLONG_MAX - 30001
+	   && i != LLONG_MAX - 20001
+	   && i != LLONG_MAX - 10001)
+	  || j != 20)
+	e = 1;
+      else
+	cnt++;
+  if (e || cnt != 3)
+    abort ();
+  else
+    cnt = 0;
+
+  #pragma omp for schedule(static) collapse(2) nowait
+  for (i = -LLONG_MAX + 30000 + v; i >= -LLONG_MAX + 10000; i -= 10000)
+    for (j = ULLONG_MAX - 3; j >= LLONG_MAX + 70ULL; j -= LLONG_MAX + 50ULL)
+      if ((i != -LLONG_MAX + 30000
+	   && i != -LLONG_MAX + 20000
+	   && i != -LLONG_MAX + 10000)
+	  || j != ULLONG_MAX - 3)
+	e = 1;
+      else
+	cnt++;
+  if (e || cnt != 3)
+    abort ();
+  else
+    cnt = 0;
+
+  #pragma omp for schedule(runtime) collapse(2) nowait
+  for (i = 10; i < 30; i++)
+    for (p = buf; p <= buf + 4; p += 2)
+      if (i < 10 || i >= 30 || (p != buf && p != buf + 2 && p != buf + 4))
+	e = 1;
+      else
+	cnt++;
+  if (e || cnt != 60)
+    abort ();
+  else
+    cnt = 0;
+
+  return 0;
+}
+
+int
+main (void)
+{
+  if (2 * sizeof (int) != sizeof (long long))
+    return 0;
+  asm volatile ("" : "+r" (v));
+  omp_set_schedule (omp_sched_dynamic, 1);
+  test1 ();
+  return 0;
+}

	Jakub
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]