]>
Commit | Line | Data |
---|---|---|
a3f9f006 ML |
1 | #!/usr/bin/env python3 |
2 | # | |
aaae53ce RI |
3 | # Check gcc.pot file for stylistic issues as described in |
4 | # https://gcc.gnu.org/onlinedocs/gccint/Guidelines-for-Diagnostics.html, | |
5 | # especially in gcc-internal-format messages. | |
a3f9f006 ML |
6 | # |
7 | # This file is part of GCC. | |
8 | # | |
9 | # GCC is free software; you can redistribute it and/or modify it under | |
10 | # the terms of the GNU General Public License as published by the Free | |
11 | # Software Foundation; either version 3, or (at your option) any later | |
12 | # version. | |
13 | # | |
14 | # GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
15 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
16 | # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
17 | # for more details. | |
18 | # | |
19 | # You should have received a copy of the GNU General Public License | |
20 | # along with GCC; see the file COPYING3. If not see | |
aaae53ce | 21 | # <http://www.gnu.org/licenses/>. |
a3f9f006 ML |
22 | |
23 | import argparse | |
24 | import re | |
aaae53ce RI |
25 | from collections import Counter |
26 | from typing import Dict, Match | |
27 | ||
28 | import polib | |
29 | ||
30 | seen_warnings = Counter() | |
31 | ||
32 | ||
33 | def location(msg: polib.POEntry): | |
34 | if msg.occurrences: | |
35 | occ = msg.occurrences[0] | |
36 | return f'{occ[0]}:{occ[1]}' | |
37 | return '<unknown location>' | |
38 | ||
39 | ||
40 | def warn(msg: polib.POEntry, | |
41 | diagnostic_id: str, diagnostic: str, include_msgid=True): | |
42 | """ | |
43 | To suppress a warning for a particular message, | |
44 | add a line "#, gcclint:ignore:{diagnostic_id}" to the message. | |
45 | """ | |
46 | ||
47 | if f'gcclint:ignore:{diagnostic_id}' in msg.flags: | |
48 | return | |
49 | ||
50 | seen_warnings[diagnostic] += 1 | |
51 | ||
52 | if include_msgid: | |
53 | print(f'{location(msg)}: {diagnostic} in {repr(msg.msgid)}') | |
54 | else: | |
55 | print(f'{location(msg)}: {diagnostic}') | |
56 | ||
57 | ||
58 | def lint_gcc_internal_format(msg: polib.POEntry): | |
59 | """ | |
60 | Checks a single message that has the gcc-internal-format. These | |
61 | messages use a variety of placeholders like %qs, %<quotes%> and | |
62 | %q#E. | |
63 | """ | |
64 | ||
65 | msgid: str = msg.msgid | |
66 | ||
67 | def outside_quotes(m: Match[str]): | |
68 | before = msgid[:m.start(0)] | |
28d8167d | 69 | return before.count('%<') == before.count('%>') |
aaae53ce RI |
70 | |
71 | def lint_matching_placeholders(): | |
72 | """ | |
73 | Warns when literal values in placeholders are not exactly equal | |
74 | in the translation. This can happen when doing copy-and-paste | |
75 | translations of similar messages. | |
76 | ||
77 | To avoid these mismatches in the first place, | |
78 | structurally equal messages are found by | |
79 | lint_diagnostics_differing_only_in_placeholders. | |
80 | ||
81 | This check only applies when checking a finished translation | |
82 | such as de.po, not gcc.pot. | |
83 | """ | |
84 | ||
85 | if not msg.translated(): | |
86 | return | |
87 | ||
88 | in_msgid = re.findall('%<[^%]+%>', msgid) | |
89 | in_msgstr = re.findall('%<[^%]+%>', msg.msgstr) | |
90 | ||
91 | if set(in_msgid) != set(in_msgstr): | |
92 | warn(msg, | |
93 | 'placeholder-mismatch', | |
94 | f'placeholder mismatch: msgid has {in_msgid}, ' | |
95 | f'msgstr has {in_msgstr}', | |
96 | include_msgid=False) | |
97 | ||
98 | def lint_option_outside_quotes(): | |
99 | for match in re.finditer(r'\S+', msgid): | |
100 | part = match.group() | |
101 | if not outside_quotes(match): | |
102 | continue | |
103 | ||
104 | if part.startswith('-'): | |
105 | if len(part) >= 2 and part[1].isalpha(): | |
106 | if part == '-INF': | |
107 | continue | |
108 | ||
109 | warn(msg, | |
110 | 'option-outside-quotes', | |
111 | 'command line option outside %<quotes%>') | |
112 | ||
113 | if part.startswith('__builtin_'): | |
114 | warn(msg, | |
115 | 'builtin-outside-quotes', | |
116 | 'builtin function outside %<quotes%>') | |
117 | ||
118 | def lint_plain_apostrophe(): | |
119 | for match in re.finditer("[^%]'", msgid): | |
120 | if outside_quotes(match): | |
121 | warn(msg, 'apostrophe', 'apostrophe without leading %') | |
122 | ||
123 | def lint_space_before_quote(): | |
124 | """ | |
125 | A space before %< is often the result of string literals that | |
126 | are joined by the C compiler and neither literal has a space | |
127 | to separate the words. | |
128 | """ | |
129 | ||
28d8167d | 130 | for match in re.finditer('(.?[a-zA-Z0-9])%<', msgid): |
aaae53ce RI |
131 | if match.group(1) != '%s': |
132 | warn(msg, | |
133 | 'no-space-before-quote', | |
134 | '%< directly following a letter or digit') | |
135 | ||
136 | def lint_underscore_outside_quotes(): | |
137 | """ | |
138 | An underscore outside of quotes is used in several contexts, | |
139 | and many of them violate the GCC Guidelines for Diagnostics: | |
140 | ||
141 | * names of GCC-internal compiler functions | |
142 | * names of GCC-internal data structures | |
143 | * static_cast and the like (which are legitimate) | |
144 | """ | |
145 | ||
28d8167d | 146 | for match in re.finditer('_', msgid): |
aaae53ce RI |
147 | if outside_quotes(match): |
148 | warn(msg, | |
149 | 'underscore-outside-quotes', | |
150 | 'underscore outside of %<quotes%>') | |
151 | return | |
152 | ||
153 | def lint_may_not(): | |
154 | """ | |
155 | The term "may not" may either mean "it could be the case" | |
156 | or "should not". These two different meanings are sometimes | |
157 | hard to tell apart. | |
158 | """ | |
159 | ||
160 | if re.search(r'\bmay not\b', msgid): | |
161 | warn(msg, | |
162 | 'ambiguous-may-not', | |
163 | 'the term "may not" is ambiguous') | |
164 | ||
165 | def lint_unbalanced_quotes(): | |
28d8167d | 166 | if msgid.count('%<') != msgid.count('%>'): |
aaae53ce RI |
167 | warn(msg, |
168 | 'unbalanced-quotes', | |
169 | 'unbalanced %< and %> quotes') | |
170 | ||
171 | if msg.translated(): | |
28d8167d | 172 | if msg.msgstr.count('%<') != msg.msgstr.count('%>'): |
aaae53ce RI |
173 | warn(msg, |
174 | 'unbalanced-quotes', | |
175 | 'unbalanced %< and %> quotes') | |
176 | ||
177 | def lint_single_space_after_sentence(): | |
178 | """ | |
179 | After a sentence there should be two spaces. | |
180 | """ | |
181 | ||
182 | if re.search(r'[.] [A-Z]', msgid): | |
183 | warn(msg, | |
184 | 'single-space-after-sentence', | |
185 | 'single space after sentence') | |
186 | ||
187 | def lint_non_canonical_quotes(): | |
188 | """ | |
189 | Catches %<%s%>, which can be written in the shorter form %qs. | |
190 | """ | |
191 | match = re.search("%<%s%>|'%s'|\"%s\"|`%s'", msgid) | |
192 | if match: | |
193 | warn(msg, | |
194 | 'non-canonical-quotes', | |
195 | f'placeholder {match.group()} should be written as %qs') | |
196 | ||
197 | lint_option_outside_quotes() | |
198 | lint_plain_apostrophe() | |
199 | lint_space_before_quote() | |
200 | lint_underscore_outside_quotes() | |
201 | lint_may_not() | |
202 | lint_unbalanced_quotes() | |
203 | lint_matching_placeholders() | |
204 | lint_single_space_after_sentence() | |
205 | lint_non_canonical_quotes() | |
206 | ||
207 | ||
208 | def lint_diagnostics_differing_only_in_placeholders(po: polib.POFile): | |
209 | """ | |
210 | Detects messages that are structurally the same, except that they | |
211 | use different plain strings inside %<quotes%>. These messages can | |
212 | be merged in order to prevent copy-and-paste mistakes by the | |
213 | translators. | |
214 | ||
215 | See bug 90119. | |
216 | """ | |
217 | ||
218 | seen: Dict[str, polib.POEntry] = {} | |
219 | ||
220 | for msg in po: | |
221 | msg: polib.POEntry | |
222 | msgid = msg.msgid | |
223 | ||
224 | normalized = re.sub('%<[^%]+%>', '%qs', msgid) | |
225 | if normalized not in seen: | |
226 | seen[normalized] = msg | |
227 | seen[msgid] = msg | |
228 | continue | |
229 | ||
230 | prev = seen[normalized] | |
231 | warn(msg, | |
232 | 'same-pattern', | |
233 | f'same pattern for {repr(msgid)} and ' | |
234 | f'{repr(prev.msgid)} in {location(prev)}', | |
235 | include_msgid=False) | |
236 | ||
237 | ||
238 | def lint_file(po: polib.POFile): | |
239 | for msg in po: | |
240 | msg: polib.POEntry | |
241 | ||
242 | if not msg.obsolete and not msg.fuzzy: | |
243 | if 'gcc-internal-format' in msg.flags: | |
244 | lint_gcc_internal_format(msg) | |
245 | ||
246 | lint_diagnostics_differing_only_in_placeholders(po) | |
247 | ||
248 | ||
249 | def main(): | |
250 | parser = argparse.ArgumentParser(description='') | |
251 | parser.add_argument('file', help='pot file') | |
252 | ||
253 | args = parser.parse_args() | |
254 | ||
255 | po = polib.pofile(args.file) | |
256 | lint_file(po) | |
257 | ||
258 | print() | |
259 | print('summary:') | |
260 | for entry in seen_warnings.most_common(): | |
261 | if entry[1] > 1: | |
262 | print(f'{entry[1]}\t{entry[0]}') | |
263 | ||
a3f9f006 | 264 | |
aaae53ce RI |
265 | if __name__ == '__main__': |
266 | main() |