]> gcc.gnu.org Git - gcc.git/blame - libgfortran/generated/matmul_i4.c
re PR libfortran/21468 (vectorizing libfortran)
[gcc.git] / libgfortran / generated / matmul_i4.c
CommitLineData
6de9cd9a 1/* Implementation of the MATMUL intrinsic
4b6903ec 2 Copyright 2002, 2005 Free Software Foundation, Inc.
6de9cd9a
DN
3 Contributed by Paul Brook <paul@nowt.org>
4
883c9d4d 5This file is part of the GNU Fortran 95 runtime library (libgfortran).
6de9cd9a
DN
6
7Libgfortran is free software; you can redistribute it and/or
57dea9f6 8modify it under the terms of the GNU General Public
6de9cd9a 9License as published by the Free Software Foundation; either
57dea9f6
TM
10version 2 of the License, or (at your option) any later version.
11
12In addition to the permissions in the GNU General Public License, the
13Free Software Foundation gives you unlimited permission to link the
14compiled version of this file into combinations with other programs,
15and to distribute those combinations without any restriction coming
16from the use of this file. (The General Public License restrictions
17do apply in other respects; for example, they cover modification of
18the file, and distribution when not linked into a combine
19executable.)
6de9cd9a
DN
20
21Libgfortran is distributed in the hope that it will be useful,
22but WITHOUT ANY WARRANTY; without even the implied warranty of
23MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
57dea9f6 24GNU General Public License for more details.
6de9cd9a 25
57dea9f6
TM
26You should have received a copy of the GNU General Public
27License along with libgfortran; see the file COPYING. If not,
fe2ae685
KC
28write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
29Boston, MA 02110-1301, USA. */
6de9cd9a
DN
30
31#include "config.h"
32#include <stdlib.h>
410d3bba 33#include <string.h>
6de9cd9a
DN
34#include <assert.h>
35#include "libgfortran.h"
36
644cb69f
FXC
37#if defined (HAVE_GFC_INTEGER_4)
38
410d3bba
VL
39/* This is a C version of the following fortran pseudo-code. The key
40 point is the loop order -- we access all arrays column-first, which
41 improves the performance enough to boost galgel spec score by 50%.
42
43 DIMENSION A(M,COUNT), B(COUNT,N), C(M,N)
44 C = 0
45 DO J=1,N
46 DO K=1,COUNT
47 DO I=1,M
48 C(I,J) = C(I,J)+A(I,K)*B(K,J)
49*/
50
85206901
JB
51extern void matmul_i4 (gfc_array_i4 * const restrict retarray,
52 gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b);
7f68c75f 53export_proto(matmul_i4);
7d7b8bfe 54
6de9cd9a 55void
85206901
JB
56matmul_i4 (gfc_array_i4 * const restrict retarray,
57 gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b)
6de9cd9a 58{
85206901
JB
59 const GFC_INTEGER_4 * restrict abase;
60 const GFC_INTEGER_4 * restrict bbase;
61 GFC_INTEGER_4 * restrict dest;
410d3bba
VL
62
63 index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
64 index_type x, y, n, count, xcount, ycount;
6de9cd9a
DN
65
66 assert (GFC_DESCRIPTOR_RANK (a) == 2
67 || GFC_DESCRIPTOR_RANK (b) == 2);
883c9d4d 68
410d3bba
VL
69/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
70
71 Either A or B (but not both) can be rank 1:
72
73 o One-dimensional argument A is implicitly treated as a row matrix
74 dimensioned [1,count], so xcount=1.
75
76 o One-dimensional argument B is implicitly treated as a column matrix
77 dimensioned [count, 1], so ycount=1.
78 */
79
883c9d4d
VL
80 if (retarray->data == NULL)
81 {
82 if (GFC_DESCRIPTOR_RANK (a) == 1)
83 {
84 retarray->dim[0].lbound = 0;
85 retarray->dim[0].ubound = b->dim[1].ubound - b->dim[1].lbound;
86 retarray->dim[0].stride = 1;
87 }
88 else if (GFC_DESCRIPTOR_RANK (b) == 1)
89 {
90 retarray->dim[0].lbound = 0;
91 retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
92 retarray->dim[0].stride = 1;
93 }
94 else
95 {
96 retarray->dim[0].lbound = 0;
97 retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
98 retarray->dim[0].stride = 1;
420aa7b8 99
883c9d4d
VL
100 retarray->dim[1].lbound = 0;
101 retarray->dim[1].ubound = b->dim[1].ubound - b->dim[1].lbound;
102 retarray->dim[1].stride = retarray->dim[0].ubound+1;
103 }
420aa7b8 104
07d3cebe 105 retarray->data
4b6903ec 106 = internal_malloc_size (sizeof (GFC_INTEGER_4) * size0 ((array_t *) retarray));
efd4dc1a 107 retarray->offset = 0;
883c9d4d
VL
108 }
109
6de9cd9a
DN
110 if (retarray->dim[0].stride == 0)
111 retarray->dim[0].stride = 1;
85206901
JB
112
113 /* This prevents constifying the input arguments. */
6de9cd9a
DN
114 if (a->dim[0].stride == 0)
115 a->dim[0].stride = 1;
116 if (b->dim[0].stride == 0)
117 b->dim[0].stride = 1;
118
119
120 if (GFC_DESCRIPTOR_RANK (retarray) == 1)
121 {
410d3bba
VL
122 /* One-dimensional result may be addressed in the code below
123 either as a row or a column matrix. We want both cases to
124 work. */
125 rxstride = rystride = retarray->dim[0].stride;
6de9cd9a
DN
126 }
127 else
128 {
129 rxstride = retarray->dim[0].stride;
130 rystride = retarray->dim[1].stride;
131 }
132
410d3bba 133
6de9cd9a
DN
134 if (GFC_DESCRIPTOR_RANK (a) == 1)
135 {
410d3bba
VL
136 /* Treat it as a a row matrix A[1,count]. */
137 axstride = a->dim[0].stride;
138 aystride = 1;
139
6de9cd9a 140 xcount = 1;
410d3bba 141 count = a->dim[0].ubound + 1 - a->dim[0].lbound;
6de9cd9a
DN
142 }
143 else
144 {
410d3bba
VL
145 axstride = a->dim[0].stride;
146 aystride = a->dim[1].stride;
147
6de9cd9a 148 count = a->dim[1].ubound + 1 - a->dim[1].lbound;
6de9cd9a
DN
149 xcount = a->dim[0].ubound + 1 - a->dim[0].lbound;
150 }
410d3bba
VL
151
152 assert(count == b->dim[0].ubound + 1 - b->dim[0].lbound);
153
6de9cd9a
DN
154 if (GFC_DESCRIPTOR_RANK (b) == 1)
155 {
410d3bba
VL
156 /* Treat it as a column matrix B[count,1] */
157 bxstride = b->dim[0].stride;
158
159 /* bystride should never be used for 1-dimensional b.
160 in case it is we want it to cause a segfault, rather than
161 an incorrect result. */
420aa7b8 162 bystride = 0xDEADBEEF;
6de9cd9a
DN
163 ycount = 1;
164 }
165 else
166 {
410d3bba
VL
167 bxstride = b->dim[0].stride;
168 bystride = b->dim[1].stride;
6de9cd9a
DN
169 ycount = b->dim[1].ubound + 1 - b->dim[1].lbound;
170 }
171
410d3bba
VL
172 abase = a->data;
173 bbase = b->data;
174 dest = retarray->data;
175
176 if (rxstride == 1 && axstride == 1 && bxstride == 1)
6de9cd9a 177 {
85206901
JB
178 const GFC_INTEGER_4 * restrict bbase_y;
179 GFC_INTEGER_4 * restrict dest_y;
180 const GFC_INTEGER_4 * restrict abase_n;
410d3bba
VL
181 GFC_INTEGER_4 bbase_yn;
182
ae740cce
TK
183 if (rystride == ycount)
184 memset (dest, 0, (sizeof (GFC_INTEGER_4) * size0((array_t *) retarray)));
185 else
186 {
187 for (y = 0; y < ycount; y++)
188 for (x = 0; x < xcount; x++)
189 dest[x + y*rystride] = (GFC_INTEGER_4)0;
190 }
410d3bba
VL
191
192 for (y = 0; y < ycount; y++)
193 {
194 bbase_y = bbase + y*bystride;
195 dest_y = dest + y*rystride;
196 for (n = 0; n < count; n++)
197 {
198 abase_n = abase + n*aystride;
199 bbase_yn = bbase_y[n];
200 for (x = 0; x < xcount; x++)
201 {
202 dest_y[x] += abase_n[x] * bbase_yn;
203 }
204 }
205 }
206 }
207 else
208 {
209 for (y = 0; y < ycount; y++)
210 for (x = 0; x < xcount; x++)
211 dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
212
213 for (y = 0; y < ycount; y++)
214 for (n = 0; n < count; n++)
215 for (x = 0; x < xcount; x++)
216 /* dest[x,y] += a[x,n] * b[n,y] */
217 dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
6de9cd9a
DN
218 }
219}
644cb69f
FXC
220
221#endif
This page took 0.199197 seconds and 5 git commands to generate.