]> gcc.gnu.org Git - gcc.git/blame - gcc/mbchar.c
configure.in (enable_c_mbchar): New configure option.
[gcc.git] / gcc / mbchar.c
CommitLineData
56f48ce9
DB
1/* Multibyte Character Functions.
2 Copyright (C) 1998 Free Software Foundation, Inc.
3
4This file is part of GNU CC.
5
6GNU CC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 2, or (at your option)
9any later version.
10
11GNU CC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU CC; see the file COPYING. If not, write to
18the Free Software Foundation, 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
20
21/* These functions are used to manipulate multibyte characters. */
22
23/* Note regarding cross compilation:
24
25 In general translation of multibyte characters to wide characters can
26 only work in a native compiler since the translation function (mbtowc)
27 needs to know about both the source and target character encoding. However,
28 this particular implementation for JIS, SJIS and EUCJP source characters
29 will work for any compiler with a newlib target. Other targets may also
30 work provided that their wchar_t implementation is 2 bytes and the encoding
31 leaves the source character values unchanged (except for removing the
32 state shifting markers). */
33
34#ifdef MULTIBYTE_CHARS
35#include "config.h"
36#include "system.h"
37#include "gansidecl.h"
38#include "mbchar.h"
39#include <locale.h>
40
41typedef enum
42{
43 ESCAPE, DOLLAR, BRACKET, AT, B, J, NUL, JIS_CHAR, OTHER, JIS_C_NUM
44} JIS_CHAR_TYPE;
45
46typedef enum
47{
48 ASCII, A_ESC, A_ESC_DL, JIS, JIS_1, JIS_2, J_ESC, J_ESC_BR,
49 J2_ESC, J2_ESC_BR, INV, JIS_S_NUM
50} JIS_STATE;
51
52typedef enum
53{
54 COPYA, COPYJ, COPYJ2, MAKE_A, MAKE_J, NOOP, EMPTY, ERROR
55} JIS_ACTION;
56
57/*****************************************************************************
58 * state/action tables for processing JIS encoding
59 * Where possible, switches to JIS are grouped with proceding JIS characters
60 * and switches to ASCII are grouped with preceding JIS characters.
61 * Thus, maximum returned length is:
62 * 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
63 *****************************************************************************/
64static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
65/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER*/
66/*ASCII*/ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
67/*A_ESC*/ { ASCII, A_ESC_DL,ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
68/*A_ESC_DL*/{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII,ASCII,ASCII},
69/*JIS*/ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1,INV },
70/*JIS_1*/ { INV, JIS_2, JIS_2, JIS_2, JIS_2, JIS_2, INV, JIS_2,INV },
71/*JIS_2*/ { J2_ESC,JIS, JIS, JIS, JIS, JIS, INV, JIS, JIS },
72/*J_ESC*/ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV },
73/*J_ESC_BR*/{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
74/*J2_ESC*/ { INV, INV, J2_ESC_BR,INV, INV, INV, INV, INV, INV },
75/*J2_ESC_BR*/{INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
76};
77
78static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
79/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
80/*ASCII */ {NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, EMPTY, COPYA, COPYA},
81/*A_ESC */ {COPYA, NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA},
82/*A_ESC_DL */{COPYA, COPYA, COPYA, MAKE_J, MAKE_J, COPYA, COPYA, COPYA, COPYA},
83/*JIS */ {NOOP, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
84/*JIS_1 */ {ERROR, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
85/*JIS_2 */ {NOOP, COPYJ2,COPYJ2,COPYJ2, COPYJ2, COPYJ2,ERROR, COPYJ2,COPYJ2},
86/*J_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
87/*J_ESC_BR */{ERROR, ERROR, ERROR, ERROR, NOOP, NOOP, ERROR, ERROR, ERROR },
88/*J2_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
89/*J2_ESC_BR*/{ERROR, ERROR, ERROR, ERROR, COPYJ, COPYJ, ERROR, ERROR, ERROR },
90};
91
92
93char *literal_codeset = NULL;
94
95int
96local_mbtowc (pwc, s, n)
97 wchar_t *pwc;
98 const char *s;
99 size_t n;
100{
101 static JIS_STATE save_state = ASCII;
102 JIS_STATE curr_state = save_state;
103 unsigned char *t = (unsigned char *)s;
104
105 if (s != NULL && n == 0)
106 return -1;
107
108 if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
109 {
110 /* This must be the "C" locale or unknown locale -- fall thru */
111 }
112 else if (! strcmp (literal_codeset, "C-SJIS"))
113 {
114 int char1;
115 if (s == NULL)
116 return 0; /* not state-dependent */
117 char1 = *t;
118 if (ISSJIS1 (char1))
119 {
120 int char2 = t[1];
121 if (n <= 1)
122 return -1;
123 if (ISSJIS2 (char2))
124 {
125 if (pwc != NULL)
126 *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
127 return 2;
128 }
129 return -1;
130 }
131 if (pwc != NULL)
132 *pwc = (wchar_t)*t;
133 if (*t == '\0')
134 return 0;
135 return 1;
136 }
137 else if (! strcmp (literal_codeset, "C-EUCJP"))
138 {
139 int char1;
140 if (s == NULL)
141 return 0; /* not state-dependent */
142 char1 = *t;
143 if (ISEUCJP (char1))
144 {
145 int char2 = t[1];
146 if (n <= 1)
147 return -1;
148 if (ISEUCJP (char2))
149 {
150 if (pwc != NULL)
151 *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
152 return 2;
153 }
154 return -1;
155 }
156 if (pwc != NULL)
157 *pwc = (wchar_t)*t;
158 if (*t == '\0')
159 return 0;
160 return 1;
161 }
162 else if (! strcmp (literal_codeset, "C-JIS"))
163 {
164 JIS_ACTION action;
165 JIS_CHAR_TYPE ch;
166 unsigned char *ptr;
167 int i, curr_ch;
168
169 if (s == NULL)
170 {
171 save_state = ASCII;
172 return 1; /* state-dependent */
173 }
174
175 ptr = t;
176
177 for (i = 0; i < n; ++i)
178 {
179 curr_ch = t[i];
180 switch (curr_ch)
181 {
182 case JIS_ESC_CHAR:
183 ch = ESCAPE;
184 break;
185 case '$':
186 ch = DOLLAR;
187 break;
188 case '@':
189 ch = AT;
190 break;
191 case '(':
192 ch = BRACKET;
193 break;
194 case 'B':
195 ch = B;
196 break;
197 case 'J':
198 ch = J;
199 break;
200 case '\0':
201 ch = NUL;
202 break;
203 default:
204 if (ISJIS (curr_ch))
205 ch = JIS_CHAR;
206 else
207 ch = OTHER;
208 }
209
210 action = JIS_action_table[curr_state][ch];
211 curr_state = JIS_state_table[curr_state][ch];
212
213 switch (action)
214 {
215 case NOOP:
216 break;
217 case EMPTY:
218 if (pwc != NULL)
219 *pwc = (wchar_t)0;
220 save_state = curr_state;
221 return i;
222 case COPYA:
223 if (pwc != NULL)
224 *pwc = (wchar_t)*ptr;
225 save_state = curr_state;
226 return (i + 1);
227 case COPYJ:
228 if (pwc != NULL)
229 *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
230 save_state = curr_state;
231 return (i + 1);
232 case COPYJ2:
233 if (pwc != NULL)
234 *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
235 save_state = curr_state;
236 return (ptr - t) + 2;
237 case MAKE_A:
238 case MAKE_J:
239 ptr = (char *)(t + i + 1);
240 break;
241 case ERROR:
242 default:
243 return -1;
244 }
245 }
246
247 return -1; /* n < bytes needed */
248 }
249
250#ifdef CROSS_COMPILE
251 if (s == NULL)
252 return 0; /* not state-dependent */
253 if (pwc != NULL)
254 *pwc = *s;
255 return 1;
256#else
257 /* This must be the "C" locale or unknown locale. */
258 return mbtowc (pwc, s, n);
259#endif
260}
261
262int
263local_mblen (s, n)
264 const char *s;
265 size_t n;
266{
267 return local_mbtowc (NULL, s, n);
268}
269
270int
271local_mb_cur_max ()
272{
273 if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
274 ;
275 else if (! strcmp (literal_codeset, "C-SJIS"))
276 return 2;
277 else if (! strcmp (literal_codeset, "C-EUCJP"))
278 return 2;
279 else if (! strcmp (literal_codeset, "C-JIS"))
280 return 8; /* 3 + 2 + 3 */
281
282#ifdef CROSS_COMPILE
283 return 1;
284#else
285 return MB_CUR_MAX;
286#endif
287}
288#endif /* MULTIBYTE_CHARS */
This page took 0.051948 seconds and 5 git commands to generate.