This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[FORTRAN] Avoid allocating unused WHERE masks


It turns out that code generation of WHERE statements was worse than
I thought.  On Saturday, I claimed that the following source code:

   where (b .ne. 0)
      a = a + 10
   endwhere

generated the following internal representation:

    int *mask = malloc(5*sizeof(int));
    for (i=0; i<5; i++)
      mask[i] = (b[i] != 0);
    for (i=0; i<5; i++)
      if (mask[i])
        a[i] += 10;
    free(mask);

Using only a single mask array.  On closer inspection, at the time it
actually generated something like:

    int *mask1 = malloc(5*sizeof(int));
    int *mask2 = malloc(5*sizeof(int));
    for (i=0; i<5; i++)
    {
      int flag = (b[i] != 0);
      mask1[i] = flag;
      mask2[i] = !flag;
    }
    for (i=0; i<5; i++)
      if (mask1[i])
        a[i] += 10;
    free(mask1);
    free(mask2);


i.e. we always allocate and populate two complementary masks, even
when the second one (mask2 above) is completely unused.  Unfortunately,
GCC's tree-level and RTL optimizers are currently unable to optimize
away this second allocation, meaning that simple WHERE loops allocate
twice as much memory as they need.


The patch below fixes this by avoiding the allocation, population
and freeing of the complementary "pending mask" when it is not
used, i.e. at the end of a WHERE chain (i.e. when the last
conditional WHERE/ELSEWHERE expression isn't followed by an
ELSEWHERE).  This is signalled by passing a NULL pointer as the
NMASK argument to gfc_evaluate_where_mask.

In a related clean-up, the patch below also removes the "pmask"
argument to gfc_trans_where_2.  All callers of this function currently
pass NULL for this parameter, so removing it makes understanding the
rest of this change easier, as "pmask" is local to gfc_trans_where_2.


Tested on x86_64-unknown-linux-gnu with a boostrap including fortran,
and tested with a top-level "make -k check" with no new failures.

Ok for mainline?  WHERE statements without ELSEWHERE clauses should
now allocate 1/8th of the memory they did previously (combined with
the recent LOGICAL*4 to LOGICAL*1 patch), except for those around a
single independent assignment which now require no allocations.

SteveK, could you retry the code you mentioned to see whether the
increased memory requirements you observed have now been resolved?



2006-02-06  Roger Sayle  <roger@eyesopen.com>

	* trans-stmt.c (gfc_evaluate_where_mask): Allow the NMASK argument
	to be NULL to indicate that the not mask isn't required.
	(gfc_trans_where_2): Remove PMASK argument.  Avoid calculating the
	pending mask for the last clause of a WHERE chain. Update
	recursive call.
	(gfc_trans_forall_1): Update call to gfc_trans_where_2.
	(gfc_trans_where): Likewise.


Index: trans-stmt.c
===================================================================
*** trans-stmt.c	(revision 110625)
--- trans-stmt.c	(working copy)
*************** typedef struct forall_info
*** 69,75 ****
  }
  forall_info;

! static void gfc_trans_where_2 (gfc_code *, tree, tree, forall_info *,
                                 stmtblock_t *, temporary_list **temp);

  /* Translate a F95 label number to a LABEL_EXPR.  */
--- 69,75 ----
  }
  forall_info;

! static void gfc_trans_where_2 (gfc_code *, tree, forall_info *,
                                 stmtblock_t *, temporary_list **temp);

  /* Translate a F95 label number to a LABEL_EXPR.  */
*************** gfc_trans_forall_1 (gfc_code * code, for
*** 2526,2532 ****

  	  /* Translate WHERE or WHERE construct nested in FORALL.  */
            temp = NULL;
! 	  gfc_trans_where_2 (c, NULL, NULL, nested_forall_info, &block, &temp);

            while (temp)
              {
--- 2526,2532 ----

  	  /* Translate WHERE or WHERE construct nested in FORALL.  */
            temp = NULL;
! 	  gfc_trans_where_2 (c, NULL, nested_forall_info, &block, &temp);

            while (temp)
              {
*************** tree gfc_trans_forall (gfc_code * code)
*** 2623,2631 ****
     the nested forall.
     ME is the WHERE mask expression.
     MASK is the temporary which value is mask's value.
!    NMASK is another temporary which value is !mask.
!    TEMP records the temporary's address allocated in this function in order to
!    free them outside this function.
     MASK, NMASK and TEMP are all OUT arguments.  */

  static tree
--- 2623,2631 ----
     the nested forall.
     ME is the WHERE mask expression.
     MASK is the temporary which value is mask's value.
!    NMASK is another temporary which value is !mask, or NULL if not required.
!    TEMP records the temporary's address allocated in this function in order
!    to free them outside this function.
     MASK, NMASK and TEMP are all OUT arguments.  */

  static tree
*************** gfc_evaluate_where_mask (gfc_expr * me,
*** 2670,2687 ****
        *temp = tempo;
      }

!   /* Allocate temporary for !mask.  */
!   ntmp = allocate_temp_for_forall_nest_1 (mask_type, size, block, &ptemp2);
!
!   /* Record the temporary  in order to free it later.  */
!   if (ptemp2)
      {
!       temporary_list *tempo;
!       tempo = (temporary_list *) gfc_getmem (sizeof (temporary_list));
!       tempo->temporary = ptemp2;
!       tempo->next = *temp;
!       *temp = tempo;
      }

    /* Variable to index the temporary.  */
    count = gfc_create_var (gfc_array_index_type, "count");
--- 2670,2692 ----
        *temp = tempo;
      }

!   if (nmask)
      {
!       /* Allocate temporary for !mask.  */
!       ntmp = allocate_temp_for_forall_nest_1 (mask_type, size, block, &ptemp2);
!
!       /* Record the temporary  in order to free it later.  */
!       if (ptemp2)
! 	{
! 	  temporary_list *tempo;
! 	  tempo = (temporary_list *) gfc_getmem (sizeof (temporary_list));
! 	  tempo->temporary = ptemp2;
! 	  tempo->next = *temp;
! 	  *temp = tempo;
! 	}
      }
+   else
+     ntmp = NULL_TREE;

    /* Variable to index the temporary.  */
    count = gfc_create_var (gfc_array_index_type, "count");
*************** gfc_evaluate_where_mask (gfc_expr * me,
*** 2720,2734 ****
      }
    /* Form the expression of the temporary.  */
    lse.expr = gfc_build_array_ref (tmp, count);
-   tmpexpr = gfc_build_array_ref (ntmp, count);

    /* Use the scalar assignment to fill temporary TMP.  */
    tmp1 = gfc_trans_scalar_assign (&lse, &rse, me->ts.type);
    gfc_add_expr_to_block (&body1, tmp1);

!   /* Fill temporary NTMP.  */
!   tmp1 = build1 (TRUTH_NOT_EXPR, TREE_TYPE (lse.expr), lse.expr);
!   gfc_add_modify_expr (&body1, tmpexpr, tmp1);

   if (lss == gfc_ss_terminator)
      {
--- 2725,2742 ----
      }
    /* Form the expression of the temporary.  */
    lse.expr = gfc_build_array_ref (tmp, count);

    /* Use the scalar assignment to fill temporary TMP.  */
    tmp1 = gfc_trans_scalar_assign (&lse, &rse, me->ts.type);
    gfc_add_expr_to_block (&body1, tmp1);

!   if (nmask)
!     {
!       /* Fill temporary NTMP.  */
!       tmp1 = build1 (TRUTH_NOT_EXPR, TREE_TYPE (lse.expr), lse.expr);
!       tmpexpr = gfc_build_array_ref (ntmp, count);
!       gfc_add_modify_expr (&body1, tmpexpr, tmp1);
!     }

   if (lss == gfc_ss_terminator)
      {
*************** gfc_evaluate_where_mask (gfc_expr * me,
*** 2760,2766 ****
    gfc_add_expr_to_block (block, tmp1);

    *mask = tmp;
!   *nmask = ntmp;

    return tmp1;
  }
--- 2768,2775 ----
    gfc_add_expr_to_block (block, tmp1);

    *mask = tmp;
!   if (nmask)
!     *nmask = ntmp;

    return tmp1;
  }
*************** gfc_trans_where_assign (gfc_expr *expr1,
*** 2990,3001 ****
  /* Translate the WHERE construct or statement.
     This function can be called iteratively to translate the nested WHERE
     construct or statement.
!    MASK is the control mask, and PMASK is the pending control mask.
     TEMP records the temporary address which must be freed later.  */

  static void
! gfc_trans_where_2 (gfc_code * code, tree mask, tree pmask,
!                    forall_info * nested_forall_info, stmtblock_t * block,
                     temporary_list ** temp)
  {
    gfc_expr *expr1;
--- 2999,3010 ----
  /* Translate the WHERE construct or statement.
     This function can be called iteratively to translate the nested WHERE
     construct or statement.
!    MASK is the control mask.
     TEMP records the temporary address which must be freed later.  */

  static void
! gfc_trans_where_2 (gfc_code * code, tree mask,
! 		   forall_info * nested_forall_info, stmtblock_t * block,
                     temporary_list ** temp)
  {
    gfc_expr *expr1;
*************** gfc_trans_where_2 (gfc_code * code, tree
*** 3006,3011 ****
--- 3015,3024 ----
    tree count1, count2;
    tree mask_copy;
    int need_temp;
+   tree *tmp1_ptr;
+   tree pmask;
+
+   pmask = NULL_TREE;

    /* the WHERE statement or the WHERE construct statement.  */
    cblock = code->block;
*************** gfc_trans_where_2 (gfc_code * code, tree
*** 3014,3022 ****
        /* Has mask-expr.  */
        if (cblock->expr)
          {
            /* Ensure that the WHERE mask be evaluated only once.  */
            tmp2 = gfc_evaluate_where_mask (cblock->expr, nested_forall_info,
!                                           &tmp, &tmp1, temp, block);

            /* Set the control mask and the pending control mask.  */
            /* It's a where-stmt.  */
--- 3027,3045 ----
        /* Has mask-expr.  */
        if (cblock->expr)
          {
+ 	  /* If this is the last clause of the WHERE construct, then
+ 	     we don't need to allocate/populate/deallocate a pmask.  */
+ 	  if (! cblock->block)
+ 	    {
+ 	      tmp1 = NULL_TREE;
+ 	      tmp1_ptr = NULL;
+ 	    }
+ 	  else
+ 	    tmp1_ptr = &tmp1;
+
            /* Ensure that the WHERE mask be evaluated only once.  */
            tmp2 = gfc_evaluate_where_mask (cblock->expr, nested_forall_info,
!                                           &tmp, tmp1_ptr, temp, block);

            /* Set the control mask and the pending control mask.  */
            /* It's a where-stmt.  */
*************** gfc_trans_where_2 (gfc_code * code, tree
*** 3102,3108 ****
              case EXEC_WHERE:
                /* Ensure that MASK is not modified by next gfc_trans_where_2.  */
                mask_copy = copy_list (mask);
!               gfc_trans_where_2 (cnext, mask_copy, NULL, nested_forall_info,
                                   block, temp);
                break;

--- 3125,3131 ----
              case EXEC_WHERE:
                /* Ensure that MASK is not modified by next gfc_trans_where_2.  */
                mask_copy = copy_list (mask);
!               gfc_trans_where_2 (cnext, mask_copy, nested_forall_info,
                                   block, temp);
                break;

*************** gfc_trans_where (gfc_code * code)
*** 3311,3317 ****
    gfc_start_block (&block);
    temp = NULL;

!   gfc_trans_where_2 (code, NULL, NULL, NULL, &block, &temp);

    /* Add calls to free temporaries which were dynamically allocated.  */
    while (temp)
--- 3334,3340 ----
    gfc_start_block (&block);
    temp = NULL;

!   gfc_trans_where_2 (code, NULL, NULL, &block, &temp);

    /* Add calls to free temporaries which were dynamically allocated.  */
    while (temp)


Roger
--


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]