]>
Commit | Line | Data |
---|---|---|
1b43b6be APB |
1 | /* Shared functions related to mangling names for the GNU compiler |
2 | for the Java(TM) language. | |
5624e564 | 3 | Copyright (C) 2001-2015 Free Software Foundation, Inc. |
1b43b6be | 4 | |
f309ff0a | 5 | This file is part of GCC. |
1b43b6be | 6 | |
f309ff0a | 7 | GCC is free software; you can redistribute it and/or modify |
1b43b6be | 8 | it under the terms of the GNU General Public License as published by |
8328d52a | 9 | the Free Software Foundation; either version 3, or (at your option) |
1b43b6be APB |
10 | any later version. |
11 | ||
f309ff0a | 12 | GCC is distributed in the hope that it will be useful, |
1b43b6be APB |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
8328d52a NC |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. | |
1b43b6be APB |
20 | |
21 | Java and all Java-based marks are trademarks or registered trademarks | |
22 | of Sun Microsystems, Inc. in the United States and other countries. | |
23 | The Free Software Foundation is independent of Sun Microsystems, Inc. */ | |
24 | ||
25 | /* Written by Alexandre Petit-Bianco <apbianco@cygnus.com> */ | |
26 | ||
27 | #include "config.h" | |
28 | #include "system.h" | |
4977bab6 | 29 | #include "coretypes.h" |
1b43b6be APB |
30 | #include "jcf.h" |
31 | #include "tree.h" | |
32 | #include "java-tree.h" | |
33 | #include "obstack.h" | |
718f9c0f | 34 | #include "diagnostic-core.h" |
1b43b6be | 35 | |
d2097937 | 36 | static void append_unicode_mangled_name (const char *, int); |
1b43b6be | 37 | #ifndef HAVE_AS_UTF8 |
d2097937 | 38 | static int unicode_mangling_length (const char *, int); |
1b43b6be APB |
39 | #endif |
40 | ||
41 | extern struct obstack *mangle_obstack; | |
42 | ||
3ad1aba1 AH |
43 | static int |
44 | utf8_cmp (const unsigned char *str, int length, const char *name) | |
45 | { | |
46 | const unsigned char *limit = str + length; | |
47 | int i; | |
48 | ||
49 | for (i = 0; name[i]; ++i) | |
50 | { | |
51 | int ch = UTF8_GET (str, limit); | |
52 | if (ch != name[i]) | |
53 | return ch - name[i]; | |
54 | } | |
55 | ||
56 | return str == limit ? 0 : 1; | |
57 | } | |
58 | ||
59 | /* A sorted list of all C++ keywords. If you change this, be sure | |
60 | also to change the list in | |
61 | libjava/classpath/tools/gnu/classpath/tools/javah/Keywords.java. */ | |
62 | static const char *const cxx_keywords[] = | |
63 | { | |
64 | "_Complex", | |
65 | "__alignof", | |
66 | "__alignof__", | |
67 | "__asm", | |
68 | "__asm__", | |
69 | "__attribute", | |
70 | "__attribute__", | |
71 | "__builtin_va_arg", | |
72 | "__complex", | |
73 | "__complex__", | |
74 | "__const", | |
75 | "__const__", | |
76 | "__extension__", | |
77 | "__imag", | |
78 | "__imag__", | |
79 | "__inline", | |
80 | "__inline__", | |
81 | "__label__", | |
82 | "__null", | |
83 | "__real", | |
84 | "__real__", | |
85 | "__restrict", | |
86 | "__restrict__", | |
87 | "__signed", | |
88 | "__signed__", | |
89 | "__typeof", | |
90 | "__typeof__", | |
91 | "__volatile", | |
92 | "__volatile__", | |
93 | "and", | |
94 | "and_eq", | |
95 | "asm", | |
96 | "auto", | |
97 | "bitand", | |
98 | "bitor", | |
99 | "bool", | |
100 | "break", | |
101 | "case", | |
102 | "catch", | |
103 | "char", | |
104 | "class", | |
105 | "compl", | |
106 | "const", | |
107 | "const_cast", | |
108 | "continue", | |
109 | "default", | |
110 | "delete", | |
111 | "do", | |
112 | "double", | |
113 | "dynamic_cast", | |
114 | "else", | |
115 | "enum", | |
116 | "explicit", | |
117 | "export", | |
118 | "extern", | |
119 | "false", | |
120 | "float", | |
121 | "for", | |
122 | "friend", | |
123 | "goto", | |
124 | "if", | |
125 | "inline", | |
126 | "int", | |
127 | "long", | |
128 | "mutable", | |
129 | "namespace", | |
130 | "new", | |
131 | "not", | |
132 | "not_eq", | |
133 | "operator", | |
134 | "or", | |
135 | "or_eq", | |
136 | "private", | |
137 | "protected", | |
138 | "public", | |
139 | "register", | |
140 | "reinterpret_cast", | |
141 | "return", | |
142 | "short", | |
143 | "signed", | |
144 | "sizeof", | |
145 | "static", | |
146 | "static_cast", | |
147 | "struct", | |
148 | "switch", | |
149 | "template", | |
150 | "this", | |
151 | "throw", | |
152 | "true", | |
153 | "try", | |
154 | "typedef", | |
155 | "typeid", | |
156 | "typename", | |
157 | "typeof", | |
158 | "union", | |
159 | "unsigned", | |
160 | "using", | |
161 | "virtual", | |
162 | "void", | |
163 | "volatile", | |
164 | "wchar_t", | |
165 | "while", | |
166 | "xor", | |
167 | "xor_eq" | |
168 | }; | |
169 | ||
170 | /* Return true if NAME is a C++ keyword. */ | |
171 | int | |
172 | cxx_keyword_p (const char *name, int length) | |
173 | { | |
174 | int last = ARRAY_SIZE (cxx_keywords); | |
175 | int first = 0; | |
176 | int mid = (last + first) / 2; | |
177 | int old = -1; | |
178 | ||
179 | for (mid = (last + first) / 2; | |
180 | mid != old; | |
181 | old = mid, mid = (last + first) / 2) | |
182 | { | |
183 | int kwl = strlen (cxx_keywords[mid]); | |
184 | int min_length = kwl > length ? length : kwl; | |
185 | int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]); | |
186 | ||
187 | if (r == 0) | |
188 | { | |
189 | int i; | |
190 | /* We've found a match if all the remaining characters are `$'. */ | |
191 | for (i = min_length; i < length && name[i] == '$'; ++i) | |
192 | ; | |
193 | if (i == length) | |
194 | return 1; | |
195 | r = 1; | |
196 | } | |
197 | ||
198 | if (r < 0) | |
199 | last = mid; | |
200 | else | |
201 | first = mid; | |
202 | } | |
203 | return 0; | |
204 | } | |
205 | ||
206 | /* If NAME happens to be a C++ keyword, add `$'. */ | |
207 | #define MANGLE_CXX_KEYWORDS(NAME, LEN) \ | |
208 | do \ | |
209 | { \ | |
210 | if (cxx_keyword_p ((NAME), (LEN))) \ | |
211 | { \ | |
212 | char *tmp_buf = (char *)alloca ((LEN)+1); \ | |
213 | memcpy (tmp_buf, (NAME), (LEN)); \ | |
214 | tmp_buf[LEN]= '$'; \ | |
215 | (NAME) = tmp_buf; \ | |
216 | (LEN)++; \ | |
217 | } \ | |
218 | } \ | |
219 | while (0) | |
220 | ||
221 | ||
1b43b6be APB |
222 | /* If the assembler doesn't support UTF8 in symbol names, some |
223 | characters might need to be escaped. */ | |
224 | ||
225 | #ifndef HAVE_AS_UTF8 | |
226 | ||
227 | /* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string | |
228 | appropriately mangled (with Unicode escapes if needed) to | |
229 | MANGLE_OBSTACK. Note that `java', `lang' and `Object' are used so | |
230 | frequently that they could be cached. */ | |
231 | ||
232 | void | |
0a2f0c54 | 233 | append_gpp_mangled_name (const char *name, int len) |
1b43b6be | 234 | { |
3ad1aba1 | 235 | int encoded_len, needs_escapes; |
1b43b6be APB |
236 | char buf[6]; |
237 | ||
3ad1aba1 AH |
238 | MANGLE_CXX_KEYWORDS (name, len); |
239 | ||
240 | encoded_len = unicode_mangling_length (name, len); | |
241 | needs_escapes = encoded_len > 0; | |
242 | ||
1b43b6be APB |
243 | sprintf (buf, "%d", (needs_escapes ? encoded_len : len)); |
244 | obstack_grow (mangle_obstack, buf, strlen (buf)); | |
245 | ||
246 | if (needs_escapes) | |
247 | append_unicode_mangled_name (name, len); | |
248 | else | |
249 | obstack_grow (mangle_obstack, name, len); | |
250 | } | |
251 | ||
252 | /* Assuming (NAME, LEN) is a Utf8-encoded string, emit the string | |
253 | appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK. | |
254 | Characters needing an escape are encoded `__UNN_' to `__UNNNN_', in | |
de4984af | 255 | which case `__U' will be mangled `__U_'. */ |
1b43b6be APB |
256 | |
257 | static void | |
0a2f0c54 | 258 | append_unicode_mangled_name (const char *name, int len) |
1b43b6be APB |
259 | { |
260 | const unsigned char *ptr; | |
261 | const unsigned char *limit = (const unsigned char *)name + len; | |
262 | int uuU = 0; | |
263 | for (ptr = (const unsigned char *) name; ptr < limit; ) | |
264 | { | |
265 | int ch = UTF8_GET(ptr, limit); | |
266 | ||
de4984af | 267 | if ((ISALNUM (ch) && ch != 'U') || ch == '$') |
5a3a8eb1 JD |
268 | { |
269 | obstack_1grow (mangle_obstack, ch); | |
270 | uuU = 0; | |
271 | } | |
1b43b6be APB |
272 | /* Everything else needs encoding */ |
273 | else | |
274 | { | |
275 | char buf [9]; | |
276 | if (ch == '_' || ch == 'U') | |
277 | { | |
278 | /* Prepare to recognize __U */ | |
279 | if (ch == '_' && (uuU < 3)) | |
280 | { | |
281 | uuU++; | |
282 | obstack_1grow (mangle_obstack, ch); | |
283 | } | |
284 | /* We recognize __U that we wish to encode | |
285 | __U_. Finish the encoding. */ | |
286 | else if (ch == 'U' && (uuU == 2)) | |
287 | { | |
288 | uuU = 0; | |
289 | obstack_grow (mangle_obstack, "U_", 2); | |
290 | } | |
1e97aa40 APB |
291 | /* Otherwise, just reset uuU and emit the character we |
292 | have. */ | |
293 | else | |
294 | { | |
295 | uuU = 0; | |
296 | obstack_1grow (mangle_obstack, ch); | |
297 | } | |
1b43b6be APB |
298 | continue; |
299 | } | |
300 | sprintf (buf, "__U%x_", ch); | |
301 | obstack_grow (mangle_obstack, buf, strlen (buf)); | |
302 | uuU = 0; | |
303 | } | |
304 | } | |
305 | } | |
306 | ||
307 | /* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the | |
308 | length of the string as mangled (a la g++) including Unicode | |
309 | escapes. If no escapes are needed, return 0. */ | |
310 | ||
311 | static int | |
0a2f0c54 | 312 | unicode_mangling_length (const char *name, int len) |
1b43b6be APB |
313 | { |
314 | const unsigned char *ptr; | |
315 | const unsigned char *limit = (const unsigned char *)name + len; | |
316 | int need_escapes = 0; /* Whether we need an escape or not */ | |
317 | int num_chars = 0; /* Number of characters in the mangled name */ | |
318 | int uuU = 0; /* Help us to find __U. 0: '_', 1: '__' */ | |
319 | for (ptr = (const unsigned char *) name; ptr < limit; ) | |
320 | { | |
321 | int ch = UTF8_GET(ptr, limit); | |
322 | ||
323 | if (ch < 0) | |
324 | error ("internal error - invalid Utf8 name"); | |
de4984af | 325 | if ((ISALNUM (ch) && ch != 'U') || ch == '$') |
5a3a8eb1 JD |
326 | { |
327 | num_chars++; | |
328 | uuU = 0; | |
329 | } | |
1b43b6be APB |
330 | /* Everything else needs encoding */ |
331 | else | |
332 | { | |
333 | int encoding_length = 2; | |
334 | ||
335 | if (ch == '_' || ch == 'U') | |
336 | { | |
1e97aa40 APB |
337 | /* It's always at least one character. */ |
338 | num_chars++; | |
339 | ||
1b43b6be APB |
340 | /* Prepare to recognize __U */ |
341 | if (ch == '_' && (uuU < 3)) | |
1e97aa40 APB |
342 | uuU++; |
343 | ||
344 | /* We recognize __U that we wish to encode __U_, we | |
345 | count one more character. */ | |
1b43b6be APB |
346 | else if (ch == 'U' && (uuU == 2)) |
347 | { | |
1e97aa40 | 348 | num_chars++; |
1b43b6be APB |
349 | need_escapes = 1; |
350 | uuU = 0; | |
351 | } | |
1e97aa40 APB |
352 | /* Otherwise, just reset uuU */ |
353 | else | |
354 | uuU = 0; | |
355 | ||
1b43b6be APB |
356 | continue; |
357 | } | |
358 | ||
359 | if (ch > 0xff) | |
360 | encoding_length++; | |
361 | if (ch > 0xfff) | |
362 | encoding_length++; | |
363 | ||
364 | num_chars += (4 + encoding_length); | |
365 | need_escapes = 1; | |
366 | uuU = 0; | |
367 | } | |
368 | } | |
369 | if (need_escapes) | |
370 | return num_chars; | |
371 | else | |
372 | return 0; | |
373 | } | |
374 | ||
375 | #else | |
376 | ||
377 | /* The assembler supports UTF8, we don't use escapes. Mangling is | |
378 | simply <N>NAME. <N> is the number of UTF8 encoded characters that | |
379 | are found in NAME. Note that `java', `lang' and `Object' are used | |
380 | so frequently that they could be cached. */ | |
381 | ||
382 | void | |
0a2f0c54 | 383 | append_gpp_mangled_name (const char *name, int len) |
1b43b6be APB |
384 | { |
385 | const unsigned char *ptr; | |
3ad1aba1 | 386 | const unsigned char *limit; |
1b43b6be APB |
387 | int encoded_len; |
388 | char buf [6]; | |
389 | ||
3ad1aba1 AH |
390 | MANGLE_CXX_KEYWORDS (name, len); |
391 | ||
392 | limit = (const unsigned char *)name + len; | |
393 | ||
1b43b6be APB |
394 | /* Compute the length of the string we wish to mangle. */ |
395 | for (encoded_len = 0, ptr = (const unsigned char *) name; | |
396 | ptr < limit; encoded_len++) | |
397 | { | |
398 | int ch = UTF8_GET(ptr, limit); | |
399 | ||
400 | if (ch < 0) | |
401 | error ("internal error - invalid Utf8 name"); | |
402 | } | |
403 | ||
404 | sprintf (buf, "%d", encoded_len); | |
405 | obstack_grow (mangle_obstack, buf, strlen (buf)); | |
406 | obstack_grow (mangle_obstack, name, len); | |
407 | } | |
408 | ||
409 | #endif /* HAVE_AS_UTF8 */ |