]> gcc.gnu.org Git - gcc.git/blame - contrib/update-copyright.pl
Add contrib/update-copyright.pl.
[gcc.git] / contrib / update-copyright.pl
CommitLineData
240d6348
RS
1#!/usr/bin/python
2#
3# Copyright (C) 2013 Free Software Foundation, Inc.
4#
5# This script is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 3, or (at your option)
8# any later version.
9
10# This script adjusts the copyright notices at the top of source files
11# so that they have the form:
12#
13# Copyright XXXX-YYYY Free Software Foundation, Inc.
14#
15# It doesn't change code that is known to be maintained elsewhere or
16# that carries a non-FSF copyright.
17#
18# The script also doesn't change testsuite files, except those in
19# libstdc++-v3. This is because libstdc++-v3 has a conformance testsuite,
20# while most tests in other directories are just things that failed at some
21# point in the past.
22#
23# Pass --this-year to the script if you want it to add the current year
24# to all applicable notices. Pass --quilt if you are using quilt and
25# want files to be added to the quilt before being changed.
26#
27# By default the script will update all directories for which the
28# output has been vetted. You can instead pass the names of individual
29# directories, including those that haven't been approved. So:
30#
31# update-copyright.pl --this-year
32#
33# is the command that would be used at the beginning of a year to update
34# all copyright notices (and possibly at other times to check whether
35# new files have been added with old years). On the other hand:
36#
37# update-copyright.pl --this-year libjava
38#
39# would run the script on just libjava/.
40#
41# Note that things like --version output strings must be updated before
42# this script is run. There's already a separate procedure for that.
43
44import os
45import re
46import sys
47import time
48import subprocess
49
50class Errors:
51 def __init__ (self):
52 self.num_errors = 0
53
54 def report (self, filename, string):
55 if filename:
56 string = filename + ': ' + string
57 sys.stderr.write (string + '\n')
58 self.num_errors += 1
59
60 def ok (self):
61 return self.num_errors == 0
62
63class GenericFilter:
64 def __init__ (self):
65 self.skip_files = set()
66 self.skip_dirs = set()
67 self.skip_extensions = set()
68 self.fossilised_files = set()
69 self.own_files = set()
70
71 self.skip_files |= set ([
72 # Skip licence files.
73 'COPYING',
74 'COPYING.LIB',
75 'COPYING3',
76 'COPYING3.LIB',
77 'LICENSE',
78 'fdl.texi',
79 'gpl_v3.texi',
80 'fdl-1.3.xml',
81 'gpl-3.0.xml',
82
83 # Skip auto- and libtool-related files
84 'aclocal.m4',
85 'compile',
86 'config.guess',
87 'config.sub',
88 'depcomp',
89 'install-sh',
90 'libtool.m4',
91 'ltmain.sh',
92 'ltoptions.m4',
93 'ltsugar.m4',
94 'ltversion.m4',
95 'lt~obsolete.m4',
96 'missing',
97 'mkdep',
98 'mkinstalldirs',
99 'move-if-change',
100 'shlibpath.m4',
101 'symlink-tree',
102 'ylwrap',
103
104 # Skip FSF mission statement, etc.
105 'gnu.texi',
106 'funding.texi',
107 'appendix_free.xml',
108
109 # Skip imported texinfo files.
110 'texinfo.tex',
111 ])
112
113
114 def get_line_filter (self, dir, filename):
115 if filename.startswith ('ChangeLog'):
116 # Ignore references to copyright in changelog entries.
117 return re.compile ('\t')
118
119 return None
120
121 def skip_file (self, dir, filename):
122 if filename in self.skip_files:
123 return True
124
125 (base, extension) = os.path.splitext (os.path.join (dir, filename))
126 if extension in self.skip_extensions:
127 return True
128
129 if extension == '.in':
130 # Skip .in files produced by automake.
131 if os.path.exists (base + '.am'):
132 return True
133
134 # Skip files produced by autogen
135 if (os.path.exists (base + '.def')
136 and os.path.exists (base + '.tpl')):
137 return True
138
139 # Skip configure files produced by autoconf
140 if filename == 'configure':
141 if os.path.exists (base + '.ac'):
142 return True
143 if os.path.exists (base + '.in'):
144 return True
145
146 return False
147
148 def skip_dir (self, dir, subdir):
149 return subdir in self.skip_dirs
150
151 def is_fossilised_file (self, dir, filename):
152 if filename in self.fossilised_files:
153 return True
154 # Only touch current current ChangeLogs.
155 if filename != 'ChangeLog' and filename.find ('ChangeLog') >= 0:
156 return True
157 return False
158
159 def by_package_author (self, dir, filename):
160 return filename in self.own_files
161
162class Copyright:
163 def __init__ (self, errors):
164 self.errors = errors
165
166 # Characters in a range of years. Include '.' for typos.
167 ranges = '[0-9](?:[-0-9.,\s]|\s+and\s+)*[0-9]'
168
169 # Non-whitespace characters in a copyright holder's name.
170 name = '[\w.,-]'
171
172 # Matches one year.
173 self.year_re = re.compile ('[0-9]+')
174
175 # Matches part of a year or copyright holder.
176 self.continuation_re = re.compile (ranges + '|' + name)
177
178 # Matches a full copyright notice:
179 self.copyright_re = re.compile (
180 # 1: 'Copyright (C)', etc.
181 '([Cc]opyright'
182 '|[Cc]opyright\s+\([Cc]\)'
183 '|[Cc]opyright\s+%s'
184 '|[Cc]opyright\s+©'
185 '|[Cc]opyright\s+@copyright{}'
186 '|@set\s+copyright[\w-]+)'
187
188 # 2: the years. Include the whitespace in the year, so that
189 # we can remove any excess.
190 '(\s*(?:' + ranges + ',?'
191 '|@value\{[^{}]*\})\s*)'
192
193 # 3: 'by ', if used
194 '(by\s+)?'
195
196 # 4: the copyright holder. Don't allow multiple consecutive
197 # spaces, so that right-margin gloss doesn't get caught
198 # (e.g. gnat_ugn.texi).
199 '(' + name + '(?:\s?' + name + ')*)?')
200
201 # A regexp for notices that might have slipped by. Just matching
202 # 'copyright' is too noisy, and 'copyright.*[0-9]' falls foul of
203 # HTML header markers, so check for 'copyright' and two digits.
204 self.other_copyright_re = re.compile ('copyright.*[0-9][0-9]',
205 re.IGNORECASE)
206 self.comment_re = re.compile('#+|[*]+|;+|%+|//+|@c |dnl ')
207 self.holders = { '@copying': '@copying' }
208 self.holder_prefixes = set()
209
210 # True to 'quilt add' files before changing them.
211 self.use_quilt = False
212
213 # If set, force all notices to include this year.
214 self.max_year = None
215
216 # Goes after the year(s). Could be ', '.
217 self.separator = ' '
218
219 def add_package_author (self, holder, canon_form = None):
220 if not canon_form:
221 canon_form = holder
222 self.holders[holder] = canon_form
223 index = holder.find (' ')
224 while index >= 0:
225 self.holder_prefixes.add (holder[:index])
226 index = holder.find (' ', index + 1)
227
228 def add_external_author (self, holder):
229 self.holders[holder] = None
230
231 class BadYear():
232 def __init__ (self, year):
233 self.year = year
234
235 def __str__ (self):
236 return 'unrecognised year: ' + self.year
237
238 def parse_year (self, string):
239 year = int (string)
240 if len (string) == 2:
241 if year > 70:
242 return year + 1900
243 elif len (string) == 4:
244 return year
245 raise self.BadYear (string)
246
247 def year_range (self, years):
248 year_list = [self.parse_year (year)
249 for year in self.year_re.findall (years)]
250 assert len (year_list) > 0
251 return (min (year_list), max (year_list))
252
253 def set_use_quilt (self, use_quilt):
254 self.use_quilt = use_quilt
255
256 def include_year (self, year):
257 assert not self.max_year
258 self.max_year = year
259
260 def canonicalise_years (self, dir, filename, filter, years):
261 # Leave texinfo variables alone.
262 if years.startswith ('@value'):
263 return years
264
265 (min_year, max_year) = self.year_range (years)
266
267 # Update the upper bound, if enabled.
268 if self.max_year and not filter.is_fossilised_file (dir, filename):
269 max_year = max (max_year, self.max_year)
270
271 # Use a range.
272 if min_year == max_year:
273 return '%d' % min_year
274 else:
275 return '%d-%d' % (min_year, max_year)
276
277 def strip_continuation (self, line):
278 line = line.lstrip()
279 match = self.comment_re.match (line)
280 if match:
281 line = line[match.end():].lstrip()
282 return line
283
284 def is_complete (self, match):
285 holder = match.group (4)
286 return (holder
287 and (holder not in self.holder_prefixes
288 or holder in self.holders))
289
290 def update_copyright (self, dir, filename, filter, file, line, match):
291 orig_line = line
292 next_line = None
293 pathname = os.path.join (dir, filename)
294
295 intro = match.group (1)
296 if intro.startswith ('@set'):
297 # Texinfo year variables should always be on one line
298 after_years = line[match.end (2):].strip()
299 if after_years != '':
300 self.errors.report (pathname,
301 'trailing characters in @set: '
302 + after_years)
303 return (False, orig_line, next_line)
304 else:
305 # If it looks like the copyright is incomplete, add the next line.
306 while not self.is_complete (match):
307 try:
308 next_line = file.next()
309 except StopIteration:
310 break
311
312 # If the next line doesn't look like a proper continuation,
313 # assume that what we've got is complete.
314 continuation = self.strip_continuation (next_line)
315 if not self.continuation_re.match (continuation):
316 break
317
318 # Merge the lines for matching purposes.
319 orig_line += next_line
320 line = line.rstrip() + ' ' + continuation
321 next_line = None
322
323 # Rematch with the longer line, at the original position.
324 match = self.copyright_re.match (line, match.start())
325 assert match
326
327 holder = match.group (4)
328
329 # Use the filter to test cases where markup is getting in the way.
330 if filter.by_package_author (dir, filename):
331 assert holder not in self.holders
332
333 elif not holder:
334 self.errors.report (pathname, 'missing copyright holder')
335 return (False, orig_line, next_line)
336
337 elif holder not in self.holders:
338 self.errors.report (pathname,
339 'unrecognised copyright holder: ' + holder)
340 return (False, orig_line, next_line)
341
342 else:
343 # See whether the copyright is associated with the package
344 # author.
345 canon_form = self.holders[holder]
346 if not canon_form:
347 return (False, orig_line, next_line)
348
349 # Make sure the author is given in a consistent way.
350 line = (line[:match.start (4)]
351 + canon_form
352 + line[match.end (4):])
353
354 # Remove any 'by'
355 line = line[:match.start (3)] + line[match.end (3):]
356
357 # Update the copyright years.
358 years = match.group (2).strip()
359 try:
360 canon_form = self.canonicalise_years (dir, filename, filter, years)
361 except self.BadYear as e:
362 self.errors.report (pathname, str (e))
363 return (False, orig_line, next_line)
364
365 line = (line[:match.start (2)]
366 + ' ' + canon_form + self.separator
367 + line[match.end (2):])
368
369 # Use the standard (C) form.
370 if intro.endswith ('right'):
371 intro += ' (C)'
372 elif intro.endswith ('(c)'):
373 intro = intro[:-3] + '(C)'
374 line = line[:match.start (1)] + intro + line[match.end (1):]
375
376 # Strip trailing whitespace
377 line = line.rstrip() + '\n'
378
379 return (line != orig_line, line, next_line)
380
381 def process_file (self, dir, filename, filter):
382 pathname = os.path.join (dir, filename)
383 if filename.endswith ('.tmp'):
384 # Looks like something we tried to create before.
385 try:
386 os.remove (pathname)
387 except OSError:
388 pass
389 return
390
391 lines = []
392 changed = False
393 line_filter = filter.get_line_filter (dir, filename)
394 with open (pathname, 'r') as file:
395 prev = None
396 for line in file:
397 while line:
398 next_line = None
399 # Leave filtered-out lines alone.
400 if not (line_filter and line_filter.match (line)):
401 match = self.copyright_re.search (line)
402 if match:
403 res = self.update_copyright (dir, filename, filter,
404 file, line, match)
405 (this_changed, line, next_line) = res
406 changed = changed or this_changed
407
408 # Check for copyright lines that might have slipped by.
409 elif self.other_copyright_re.search (line):
410 self.errors.report (pathname,
411 'unrecognised copyright: %s'
412 % line.strip())
413 lines.append (line)
414 line = next_line
415
416 # If something changed, write the new file out.
417 if changed and self.errors.ok():
418 tmp_pathname = pathname + '.tmp'
419 with open (tmp_pathname, 'w') as file:
420 for line in lines:
421 file.write (line)
422 if self.use_quilt:
423 subprocess.call (['quilt', 'add', pathname])
424 os.rename (tmp_pathname, pathname)
425
426 def process_tree (self, tree, filter):
427 for (dir, subdirs, filenames) in os.walk (tree):
428 # Don't recurse through directories that should be skipped.
429 for i in xrange (len (subdirs) - 1, -1, -1):
430 if filter.skip_dir (dir, subdirs[i]):
431 del subdirs[i]
432
433 # Handle the files in this directory.
434 for filename in filenames:
435 if filter.skip_file (dir, filename):
436 sys.stdout.write ('Skipping %s\n'
437 % os.path.join (dir, filename))
438 else:
439 self.process_file (dir, filename, filter)
440
441class CmdLine:
442 def __init__ (self, copyright = Copyright):
443 self.errors = Errors()
444 self.copyright = copyright (self.errors)
445 self.dirs = []
446 self.default_dirs = []
447 self.chosen_dirs = []
448 self.option_handlers = dict()
449 self.option_help = []
450
451 self.add_option ('--help', 'Print this help', self.o_help)
452 self.add_option ('--quilt', '"quilt add" files before changing them',
453 self.o_quilt)
454 self.add_option ('--this-year', 'Add the current year to every notice',
455 self.o_this_year)
456
457 def add_option (self, name, help, handler):
458 self.option_help.append ((name, help))
459 self.option_handlers[name] = handler
460
461 def add_dir (self, dir, filter = GenericFilter()):
462 self.dirs.append ((dir, filter))
463
464 def o_help (self, option = None):
465 sys.stdout.write ('Usage: %s [options] dir1 dir2...\n\n'
466 'Options:\n' % sys.argv[0])
467 format = '%-15s %s\n'
468 for (what, help) in self.option_help:
469 sys.stdout.write (format % (what, help))
470 sys.stdout.write ('\nDirectories:\n')
471
472 format = '%-25s'
473 i = 0
474 for (dir, filter) in self.dirs:
475 i += 1
476 if i % 3 == 0 or i == len (self.dirs):
477 sys.stdout.write (dir + '\n')
478 else:
479 sys.stdout.write (format % dir)
480 sys.exit (0)
481
482 def o_quilt (self, option):
483 self.copyright.set_use_quilt (True)
484
485 def o_this_year (self, option):
486 self.copyright.include_year (time.localtime().tm_year)
487
488 def main (self):
489 for arg in sys.argv[1:]:
490 if arg[:1] != '-':
491 self.chosen_dirs.append (arg)
492 elif arg in self.option_handlers:
493 self.option_handlers[arg] (arg)
494 else:
495 self.errors.report (None, 'unrecognised option: ' + arg)
496 if self.errors.ok():
497 if len (self.chosen_dirs) == 0:
498 self.chosen_dirs = self.default_dirs
499 if len (self.chosen_dirs) == 0:
500 self.o_help()
501 else:
502 for chosen_dir in self.chosen_dirs:
503 canon_dir = os.path.join (chosen_dir, '')
504 count = 0
505 for (dir, filter) in self.dirs:
506 if (dir + os.sep).startswith (canon_dir):
507 count += 1
508 self.copyright.process_tree (dir, filter)
509 if count == 0:
510 self.errors.report (None, 'unrecognised directory: '
511 + chosen_dir)
512 sys.exit (0 if self.errors.ok() else 1)
513
514#----------------------------------------------------------------------------
515
516class TopLevelFilter (GenericFilter):
517 def skip_dir (self, dir, subdir):
518 return True
519
520class ConfigFilter (GenericFilter):
521 def __init__ (self):
522 GenericFilter.__init__ (self)
523
524 def skip_file (self, dir, filename):
525 if filename.endswith ('.m4'):
526 pathname = os.path.join (dir, filename)
527 with open (pathname) as file:
528 # Skip files imported from gettext.
529 if file.readline().find ('gettext-') >= 0:
530 return True
531 return GenericFilter.skip_file (self, dir, filename)
532
533class GCCFilter (GenericFilter):
534 def __init__ (self):
535 GenericFilter.__init__ (self)
536
537 self.skip_files |= set ([
538 # Not part of GCC
539 'math-68881.h',
540 ])
541
542 self.skip_dirs |= set ([
543 # Better not create a merge nightmare for the GNAT folks.
544 'ada',
545
546 # Handled separately.
547 'testsuite',
548 ])
549
550 self.skip_extensions |= set ([
551 # Maintained by the translation project.
552 '.po',
553
554 # Automatically-generated.
555 '.pot',
556 ])
557
558 self.fossilised_files |= set ([
559 # Old news won't be updated.
560 'ONEWS',
561 ])
562
563class TestsuiteFilter (GenericFilter):
564 def __init__ (self):
565 GenericFilter.__init__ (self)
566
567 self.skip_extensions |= set ([
568 # Don't change the tests, which could be woend by anyone.
569 '.c',
570 '.C',
571 '.cc',
572 '.h',
573 '.hs',
574 '.f',
575 '.f90',
576 '.go',
577 '.inc',
578 '.java',
579 ])
580
581 def skip_file (self, dir, filename):
582 # g++.niklas/README contains historical copyright information
583 # and isn't updated.
584 if filename == 'README' and os.path.basename (dir) == 'g++.niklas':
585 return True
586 return GenericFilter.skip_file (self, dir, filename)
587
588class LibCppFilter (GenericFilter):
589 def __init__ (self):
590 GenericFilter.__init__ (self)
591
592 self.skip_extensions |= set ([
593 # Maintained by the translation project.
594 '.po',
595
596 # Automatically-generated.
597 '.pot',
598 ])
599
600class LibGCCFilter (GenericFilter):
601 def __init__ (self):
602 GenericFilter.__init__ (self)
603
604 self.skip_dirs |= set ([
605 # Imported from GLIBC.
606 'soft-fp',
607 ])
608
609class LibJavaFilter (GenericFilter):
610 def __init__ (self):
611 GenericFilter.__init__ (self)
612
613 self.skip_dirs |= set ([
614 # Handled separately.
615 'testsuite',
616
617 # Not really part of the library
618 'contrib',
619
620 # Imported from upstream
621 'classpath',
622 'libltdl',
623 ])
624
625 def get_line_filter (self, dir, filename):
626 if filename == 'NameDecoder.h':
627 return re.compile ('.*NAME_COPYRIGHT')
628 if filename == 'ICC_Profile.h':
629 return re.compile ('.*icSigCopyrightTag')
630 return GenericFilter.get_line_filter (self, dir, filename)
631
632class LibMudflapFilter (GenericFilter):
633 def __init__ (self):
634 GenericFilter.__init__ (self)
635
636 self.skip_dirs |= set ([
637 # Handled separately.
638 'testsuite',
639 ])
640
641class LibStdCxxFilter (GenericFilter):
642 def __init__ (self):
643 GenericFilter.__init__ (self)
644
645 self.skip_files |= set ([
646 # Contains no copyright of its own, but quotes the GPL.
647 'intro.xml',
648 ])
649
650 self.skip_dirs |= set ([
651 # Contains automatically-generated sources.
652 'html',
653
654 # The testsuite data files shouldn't be changed.
655 'data',
656
657 # Contains imported images
658 'images',
659 ])
660
661 self.own_files |= set ([
662 # Contains markup around the copyright owner.
663 'spine.xml',
664 ])
665
666 def get_line_filter (self, dir, filename):
667 if filename == 'boost_concept_check.h':
668 return re.compile ('// \(C\) Copyright Jeremy Siek')
669 return GenericFilter.get_line_filter (self, dir, filename)
670
671class GCCCopyright (Copyright):
672 def __init__ (self, errors):
673 Copyright.__init__ (self, errors)
674
675 canon_fsf = 'Free Software Foundation, Inc.'
676 self.add_package_author ('Free Software Foundation', canon_fsf)
677 self.add_package_author ('Free Software Foundation.', canon_fsf)
678 self.add_package_author ('Free Software Foundation Inc.', canon_fsf)
679 self.add_package_author ('Free Software Foundation, Inc', canon_fsf)
680 self.add_package_author ('Free Software Foundation, Inc.', canon_fsf)
681 self.add_package_author ('The Free Software Foundation', canon_fsf)
682 self.add_package_author ('The Free Software Foundation, Inc.', canon_fsf)
683 self.add_package_author ('Software Foundation, Inc.', canon_fsf)
684
685 self.add_external_author ('ARM')
686 self.add_external_author ('AdaCore')
687 self.add_external_author ('Ami Tavory and Vladimir Dreizin, IBM-HRL.')
688 self.add_external_author ('Cavium Networks.')
689 self.add_external_author ('Faraday Technology Corp.')
690 self.add_external_author ('Florida State University')
691 self.add_external_author ('Greg Colvin and Beman Dawes.')
692 self.add_external_author ('Hewlett-Packard Company')
693 self.add_external_author ('Information Technology Industry Council.')
694 self.add_external_author ('James Theiler, Brian Gough')
695 self.add_external_author ('Makoto Matsumoto and Takuji Nishimura,')
696 self.add_external_author ('National Research Council of Canada.')
697 self.add_external_author ('Peter Dimov and Multi Media Ltd.')
698 self.add_external_author ('Peter Dimov')
699 self.add_external_author ('Pipeline Associates, Inc.')
700 self.add_external_author ('Regents of the University of California.')
701 self.add_external_author ('Silicon Graphics Computer Systems, Inc.')
702 self.add_external_author ('Silicon Graphics')
703 self.add_external_author ('Stephen L. Moshier')
704 self.add_external_author ('Sun Microsystems, Inc. All rights reserved.')
705 self.add_external_author ('The Go Authors. All rights reserved.')
706 self.add_external_author ('The Go Authors. All rights reserved.')
707 self.add_external_author ('The Go Authors.')
708 self.add_external_author ('The Regents of the University of California.')
709 self.add_external_author ('Unicode, Inc.')
710 self.add_external_author ('University of Toronto.')
711
712class GCCCmdLine (CmdLine):
713 def __init__ (self):
714 CmdLine.__init__ (self, GCCCopyright)
715
716 self.add_dir ('.', TopLevelFilter())
717 # boehm-gc is imported from upstream.
718 self.add_dir ('config', ConfigFilter())
719 # contrib isn't really part of GCC.
720 self.add_dir ('fixincludes')
721 self.add_dir ('gcc', GCCFilter())
722 self.add_dir (os.path.join ('gcc', 'testsuite'), TestsuiteFilter())
723 self.add_dir ('gnattools')
724 self.add_dir ('include')
725 self.add_dir ('libada')
726 self.add_dir ('libatomic')
727 self.add_dir ('libbacktrace')
728 self.add_dir ('libcpp', LibCppFilter())
729 self.add_dir ('libdecnumber')
730 # libffi is imported from upstream.
731 self.add_dir ('libgcc', LibGCCFilter())
732 self.add_dir ('libgfortran')
733 self.add_dir ('libgomp')
734 self.add_dir ('libiberty')
735 self.add_dir ('libitm')
736 self.add_dir ('libjava', LibJavaFilter())
737 self.add_dir (os.path.join ('libjava', 'testsuite'), TestsuiteFilter())
738 self.add_dir ('libmudflap', LibMudflapFilter())
739 self.add_dir (os.path.join ('libmudflap', 'testsuite'),
740 TestsuiteFilter())
741 self.add_dir ('libobjc')
742 self.add_dir ('libquadmath')
743 # libsanitiser is imported from upstream.
744 self.add_dir ('libssp')
745 self.add_dir ('libstdc++-v3', LibStdCxxFilter())
746 self.add_dir ('lto-plugin')
747 # zlib is imported from upstream.
748
749 self.default_dirs = [
750 'gcc',
751 'libada',
752 'libatomic',
753 'libbacktrace',
754 'libcpp',
755 'libgcc',
756 'libgfortran',
757 'libgomp',
758 'libitm',
759 'libmudflap',
760 'libobjc',
761 'libquadmath',
762 'libssp',
763 'libstdc++-v3',
764 ]
765
766GCCCmdLine().main()
This page took 0.090455 seconds and 5 git commands to generate.