]> gcc.gnu.org Git - gcc.git/blame - gcc/mbchar.c
* cpp.texi: Fix some typos.
[gcc.git] / gcc / mbchar.c
CommitLineData
56f48ce9
DB
1/* Multibyte Character Functions.
2 Copyright (C) 1998 Free Software Foundation, Inc.
3
4This file is part of GNU CC.
5
6GNU CC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 2, or (at your option)
9any later version.
10
11GNU CC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU CC; see the file COPYING. If not, write to
18the Free Software Foundation, 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
20
21/* These functions are used to manipulate multibyte characters. */
22
23/* Note regarding cross compilation:
24
25 In general translation of multibyte characters to wide characters can
26 only work in a native compiler since the translation function (mbtowc)
27 needs to know about both the source and target character encoding. However,
28 this particular implementation for JIS, SJIS and EUCJP source characters
29 will work for any compiler with a newlib target. Other targets may also
30 work provided that their wchar_t implementation is 2 bytes and the encoding
31 leaves the source character values unchanged (except for removing the
32 state shifting markers). */
33
34#ifdef MULTIBYTE_CHARS
35#include "config.h"
36#include "system.h"
56f48ce9
DB
37#include "mbchar.h"
38#include <locale.h>
39
40typedef enum
41{
42 ESCAPE, DOLLAR, BRACKET, AT, B, J, NUL, JIS_CHAR, OTHER, JIS_C_NUM
43} JIS_CHAR_TYPE;
44
45typedef enum
46{
47 ASCII, A_ESC, A_ESC_DL, JIS, JIS_1, JIS_2, J_ESC, J_ESC_BR,
48 J2_ESC, J2_ESC_BR, INV, JIS_S_NUM
49} JIS_STATE;
50
51typedef enum
52{
53 COPYA, COPYJ, COPYJ2, MAKE_A, MAKE_J, NOOP, EMPTY, ERROR
54} JIS_ACTION;
55
56/*****************************************************************************
57 * state/action tables for processing JIS encoding
58 * Where possible, switches to JIS are grouped with proceding JIS characters
59 * and switches to ASCII are grouped with preceding JIS characters.
60 * Thus, maximum returned length is:
61 * 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
62 *****************************************************************************/
63static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
64/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER*/
65/*ASCII*/ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
66/*A_ESC*/ { ASCII, A_ESC_DL,ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
67/*A_ESC_DL*/{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII,ASCII,ASCII},
68/*JIS*/ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1,INV },
69/*JIS_1*/ { INV, JIS_2, JIS_2, JIS_2, JIS_2, JIS_2, INV, JIS_2,INV },
70/*JIS_2*/ { J2_ESC,JIS, JIS, JIS, JIS, JIS, INV, JIS, JIS },
71/*J_ESC*/ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV },
72/*J_ESC_BR*/{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
73/*J2_ESC*/ { INV, INV, J2_ESC_BR,INV, INV, INV, INV, INV, INV },
74/*J2_ESC_BR*/{INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
75};
76
77static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
78/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
79/*ASCII */ {NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, EMPTY, COPYA, COPYA},
80/*A_ESC */ {COPYA, NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA},
81/*A_ESC_DL */{COPYA, COPYA, COPYA, MAKE_J, MAKE_J, COPYA, COPYA, COPYA, COPYA},
82/*JIS */ {NOOP, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
83/*JIS_1 */ {ERROR, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
84/*JIS_2 */ {NOOP, COPYJ2,COPYJ2,COPYJ2, COPYJ2, COPYJ2,ERROR, COPYJ2,COPYJ2},
85/*J_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
86/*J_ESC_BR */{ERROR, ERROR, ERROR, ERROR, NOOP, NOOP, ERROR, ERROR, ERROR },
87/*J2_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
88/*J2_ESC_BR*/{ERROR, ERROR, ERROR, ERROR, COPYJ, COPYJ, ERROR, ERROR, ERROR },
89};
90
91
92char *literal_codeset = NULL;
93
94int
95local_mbtowc (pwc, s, n)
96 wchar_t *pwc;
97 const char *s;
98 size_t n;
99{
100 static JIS_STATE save_state = ASCII;
101 JIS_STATE curr_state = save_state;
102 unsigned char *t = (unsigned char *)s;
103
104 if (s != NULL && n == 0)
105 return -1;
106
107 if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
108 {
109 /* This must be the "C" locale or unknown locale -- fall thru */
110 }
111 else if (! strcmp (literal_codeset, "C-SJIS"))
112 {
113 int char1;
114 if (s == NULL)
115 return 0; /* not state-dependent */
116 char1 = *t;
117 if (ISSJIS1 (char1))
118 {
119 int char2 = t[1];
120 if (n <= 1)
121 return -1;
122 if (ISSJIS2 (char2))
123 {
124 if (pwc != NULL)
125 *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
126 return 2;
127 }
128 return -1;
129 }
130 if (pwc != NULL)
131 *pwc = (wchar_t)*t;
132 if (*t == '\0')
133 return 0;
134 return 1;
135 }
136 else if (! strcmp (literal_codeset, "C-EUCJP"))
137 {
138 int char1;
139 if (s == NULL)
140 return 0; /* not state-dependent */
141 char1 = *t;
142 if (ISEUCJP (char1))
143 {
144 int char2 = t[1];
145 if (n <= 1)
146 return -1;
147 if (ISEUCJP (char2))
148 {
149 if (pwc != NULL)
150 *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
151 return 2;
152 }
153 return -1;
154 }
155 if (pwc != NULL)
156 *pwc = (wchar_t)*t;
157 if (*t == '\0')
158 return 0;
159 return 1;
160 }
161 else if (! strcmp (literal_codeset, "C-JIS"))
162 {
163 JIS_ACTION action;
164 JIS_CHAR_TYPE ch;
165 unsigned char *ptr;
166 int i, curr_ch;
167
168 if (s == NULL)
169 {
170 save_state = ASCII;
171 return 1; /* state-dependent */
172 }
173
174 ptr = t;
175
176 for (i = 0; i < n; ++i)
177 {
178 curr_ch = t[i];
179 switch (curr_ch)
180 {
181 case JIS_ESC_CHAR:
182 ch = ESCAPE;
183 break;
184 case '$':
185 ch = DOLLAR;
186 break;
187 case '@':
188 ch = AT;
189 break;
190 case '(':
191 ch = BRACKET;
192 break;
193 case 'B':
194 ch = B;
195 break;
196 case 'J':
197 ch = J;
198 break;
199 case '\0':
200 ch = NUL;
201 break;
202 default:
203 if (ISJIS (curr_ch))
204 ch = JIS_CHAR;
205 else
206 ch = OTHER;
207 }
208
209 action = JIS_action_table[curr_state][ch];
210 curr_state = JIS_state_table[curr_state][ch];
211
212 switch (action)
213 {
214 case NOOP:
215 break;
216 case EMPTY:
217 if (pwc != NULL)
218 *pwc = (wchar_t)0;
219 save_state = curr_state;
220 return i;
221 case COPYA:
222 if (pwc != NULL)
223 *pwc = (wchar_t)*ptr;
224 save_state = curr_state;
225 return (i + 1);
226 case COPYJ:
227 if (pwc != NULL)
228 *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
229 save_state = curr_state;
230 return (i + 1);
231 case COPYJ2:
232 if (pwc != NULL)
233 *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
234 save_state = curr_state;
235 return (ptr - t) + 2;
236 case MAKE_A:
237 case MAKE_J:
238 ptr = (char *)(t + i + 1);
239 break;
240 case ERROR:
241 default:
242 return -1;
243 }
244 }
245
246 return -1; /* n < bytes needed */
247 }
248
249#ifdef CROSS_COMPILE
250 if (s == NULL)
251 return 0; /* not state-dependent */
252 if (pwc != NULL)
253 *pwc = *s;
254 return 1;
255#else
256 /* This must be the "C" locale or unknown locale. */
257 return mbtowc (pwc, s, n);
258#endif
259}
260
261int
262local_mblen (s, n)
263 const char *s;
264 size_t n;
265{
266 return local_mbtowc (NULL, s, n);
267}
268
269int
270local_mb_cur_max ()
271{
272 if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
273 ;
274 else if (! strcmp (literal_codeset, "C-SJIS"))
275 return 2;
276 else if (! strcmp (literal_codeset, "C-EUCJP"))
277 return 2;
278 else if (! strcmp (literal_codeset, "C-JIS"))
279 return 8; /* 3 + 2 + 3 */
280
281#ifdef CROSS_COMPILE
282 return 1;
283#else
4d2a3f76
DB
284 if (MB_CUR_MAX > 0)
285 return MB_CUR_MAX;
286
287 return 1; /* default */
56f48ce9
DB
288#endif
289}
290#endif /* MULTIBYTE_CHARS */
This page took 0.236094 seconds and 5 git commands to generate.