Projet_SETI_RISC-V/riscv-gnu-toolchain/gcc/contrib/check-internal-format-escaping.py

#!/usr/bin/env python3
#
# Check gcc.pot file for stylistic issues as described in
# https://gcc.gnu.org/onlinedocs/gccint/Guidelines-for-Diagnostics.html,
# especially in gcc-internal-format messages.
#
# This file is part of GCC.
#
# GCC is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 3, or (at your option) any later
# version.
#
# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License
# along with GCC; see the file COPYING3.  If not see
# <http://www.gnu.org/licenses/>.

import argparse
import re
from collections import Counter
from typing import Dict, Match

import polib

seen_warnings = Counter()


def location(msg: polib.POEntry):
    if msg.occurrences:
        occ = msg.occurrences[0]
        return f'{occ[0]}:{occ[1]}'
    return '<unknown location>'


def warn(msg: polib.POEntry,
         diagnostic_id: str, diagnostic: str, include_msgid=True):
    """
    To suppress a warning for a particular message,
    add a line "#, gcclint:ignore:{diagnostic_id}" to the message.
    """

    if f'gcclint:ignore:{diagnostic_id}' in msg.flags:
        return

    seen_warnings[diagnostic] += 1

    if include_msgid:
        print(f'{location(msg)}: {diagnostic} in {repr(msg.msgid)}')
    else:
        print(f'{location(msg)}: {diagnostic}')


def lint_gcc_internal_format(msg: polib.POEntry):
    """
    Checks a single message that has the gcc-internal-format. These
    messages use a variety of placeholders like %qs, %<quotes%> and
    %q#E.
    """

    msgid: str = msg.msgid

    def outside_quotes(m: Match[str]):
        before = msgid[:m.start(0)]
        return before.count('%<') == before.count('%>')

    def lint_matching_placeholders():
        """
        Warns when literal values in placeholders are not exactly equal
        in the translation. This can happen when doing copy-and-paste
        translations of similar messages.

        To avoid these mismatches in the first place,
        structurally equal messages are found by
        lint_diagnostics_differing_only_in_placeholders.

        This check only applies when checking a finished translation
        such as de.po, not gcc.pot.
        """

        if not msg.translated():
            return

        in_msgid = re.findall('%<[^%]+%>', msgid)
        in_msgstr = re.findall('%<[^%]+%>', msg.msgstr)

        if set(in_msgid) != set(in_msgstr):
            warn(msg,
                 'placeholder-mismatch',
                 f'placeholder mismatch: msgid has {in_msgid}, '
                 f'msgstr has {in_msgstr}',
                 include_msgid=False)

    def lint_option_outside_quotes():
        for match in re.finditer(r'\S+', msgid):
            part = match.group()
            if not outside_quotes(match):
                continue

            if part.startswith('-'):
                if len(part) >= 2 and part[1].isalpha():
                    if part == '-INF':
                        continue

                    warn(msg,
                         'option-outside-quotes',
                         'command line option outside %<quotes%>')

            if part.startswith('__builtin_'):
                warn(msg,
                     'builtin-outside-quotes',
                     'builtin function outside %<quotes%>')

    def lint_plain_apostrophe():
        for match in re.finditer("[^%]'", msgid):
            if outside_quotes(match):
                warn(msg, 'apostrophe', 'apostrophe without leading %')

    def lint_space_before_quote():
        """
        A space before %< is often the result of string literals that
        are joined by the C compiler and neither literal has a space
        to separate the words.
        """

        for match in re.finditer('(.?[a-zA-Z0-9])%<', msgid):
            if match.group(1) != '%s':
                warn(msg,
                     'no-space-before-quote',
                     '%< directly following a letter or digit')

    def lint_underscore_outside_quotes():
        """
        An underscore outside of quotes is used in several contexts,
        and many of them violate the GCC Guidelines for Diagnostics:

        * names of GCC-internal compiler functions
        * names of GCC-internal data structures
        * static_cast and the like (which are legitimate)
        """

        for match in re.finditer('_', msgid):
            if outside_quotes(match):
                warn(msg,
                     'underscore-outside-quotes',
                     'underscore outside of %<quotes%>')
                return

    def lint_may_not():
        """
        The term "may not" may either mean "it could be the case"
        or "should not". These two different meanings are sometimes
        hard to tell apart.
        """

        if re.search(r'\bmay not\b', msgid):
            warn(msg,
                 'ambiguous-may-not',
                 'the term "may not" is ambiguous')

    def lint_unbalanced_quotes():
        if msgid.count('%<') != msgid.count('%>'):
            warn(msg,
                 'unbalanced-quotes',
                 'unbalanced %< and %> quotes')

        if msg.translated():
            if msg.msgstr.count('%<') != msg.msgstr.count('%>'):
                warn(msg,
                     'unbalanced-quotes',
                     'unbalanced %< and %> quotes')

    def lint_single_space_after_sentence():
        """
        After a sentence there should be two spaces.
        """

        if re.search(r'[.] [A-Z]', msgid):
            warn(msg,
                 'single-space-after-sentence',
                 'single space after sentence')

    def lint_non_canonical_quotes():
        """
        Catches %<%s%>, which can be written in the shorter form %qs.
        """
        match = re.search("%<%s%>|'%s'|\"%s\"|`%s'", msgid)
        if match:
            warn(msg,
                 'non-canonical-quotes',
                 f'placeholder {match.group()} should be written as %qs')

    lint_option_outside_quotes()
    lint_plain_apostrophe()
    lint_space_before_quote()
    lint_underscore_outside_quotes()
    lint_may_not()
    lint_unbalanced_quotes()
    lint_matching_placeholders()
    lint_single_space_after_sentence()
    lint_non_canonical_quotes()


def lint_diagnostics_differing_only_in_placeholders(po: polib.POFile):
    """
    Detects messages that are structurally the same, except that they
    use different plain strings inside %<quotes%>. These messages can
    be merged in order to prevent copy-and-paste mistakes by the
    translators.

    See bug 90119.
    """

    seen: Dict[str, polib.POEntry] = {}

    for msg in po:
        msg: polib.POEntry
        msgid = msg.msgid

        normalized = re.sub('%<[^%]+%>', '%qs', msgid)
        if normalized not in seen:
            seen[normalized] = msg
            seen[msgid] = msg
            continue

        prev = seen[normalized]
        warn(msg,
             'same-pattern',
             f'same pattern for {repr(msgid)} and '
             f'{repr(prev.msgid)} in {location(prev)}',
             include_msgid=False)


def lint_file(po: polib.POFile):
    for msg in po:
        msg: polib.POEntry

        if not msg.obsolete and not msg.fuzzy:
            if 'gcc-internal-format' in msg.flags:
                lint_gcc_internal_format(msg)

    lint_diagnostics_differing_only_in_placeholders(po)


def main():
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('file', help='pot file')

    args = parser.parse_args()

    po = polib.pofile(args.file)
    lint_file(po)

    print()
    print('summary:')
    for entry in seen_warnings.most_common():
        if entry[1] > 1:
            print(f'{entry[1]}\t{entry[0]}')


if __name__ == '__main__':
    main()
projet 2023-03-06 14:48:14 +01:00			`#!/usr/bin/env python3`
			`#`
			`# Check gcc.pot file for stylistic issues as described in`
			`# https://gcc.gnu.org/onlinedocs/gccint/Guidelines-for-Diagnostics.html,`
			`# especially in gcc-internal-format messages.`
			`#`
			`# This file is part of GCC.`
			`#`
			`# GCC is free software; you can redistribute it and/or modify it under`
			`# the terms of the GNU General Public License as published by the Free`
			`# Software Foundation; either version 3, or (at your option) any later`
			`# version.`
			`#`
			`# GCC is distributed in the hope that it will be useful, but WITHOUT ANY`
			`# WARRANTY; without even the implied warranty of MERCHANTABILITY or`
			`# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License`
			`# for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with GCC; see the file COPYING3. If not see`
			`# <http://www.gnu.org/licenses/>.`

			`import argparse`
			`import re`
			`from collections import Counter`
			`from typing import Dict, Match`

			`import polib`

			`seen_warnings = Counter()`


			`def location(msg: polib.POEntry):`
			`if msg.occurrences:`
			`occ = msg.occurrences[0]`
			`return f'{occ[0]}:{occ[1]}'`
			`return '<unknown location>'`


			`def warn(msg: polib.POEntry,`
			`diagnostic_id: str, diagnostic: str, include_msgid=True):`
			`"""`
			`To suppress a warning for a particular message,`
			`add a line "#, gcclint:ignore:{diagnostic_id}" to the message.`
			`"""`

			`if f'gcclint:ignore:{diagnostic_id}' in msg.flags:`
			`return`

			`seen_warnings[diagnostic] += 1`

			`if include_msgid:`
			`print(f'{location(msg)}: {diagnostic} in {repr(msg.msgid)}')`
			`else:`
			`print(f'{location(msg)}: {diagnostic}')`


			`def lint_gcc_internal_format(msg: polib.POEntry):`
			`"""`
			`Checks a single message that has the gcc-internal-format. These`
			`messages use a variety of placeholders like %qs, %<quotes%> and`
			`%q#E.`
			`"""`

			`msgid: str = msg.msgid`

			`def outside_quotes(m: Match[str]):`
			`before = msgid[:m.start(0)]`
			`return before.count('%<') == before.count('%>')`

			`def lint_matching_placeholders():`
			`"""`
			`Warns when literal values in placeholders are not exactly equal`
			`in the translation. This can happen when doing copy-and-paste`
			`translations of similar messages.`

			`To avoid these mismatches in the first place,`
			`structurally equal messages are found by`
			`lint_diagnostics_differing_only_in_placeholders.`

			`This check only applies when checking a finished translation`
			`such as de.po, not gcc.pot.`
			`"""`

			`if not msg.translated():`
			`return`

			`in_msgid = re.findall('%<[^%]+%>', msgid)`
			`in_msgstr = re.findall('%<[^%]+%>', msg.msgstr)`

			`if set(in_msgid) != set(in_msgstr):`
			`warn(msg,`
			`'placeholder-mismatch',`
			`f'placeholder mismatch: msgid has {in_msgid}, '`
			`f'msgstr has {in_msgstr}',`
			`include_msgid=False)`

			`def lint_option_outside_quotes():`
			`for match in re.finditer(r'\S+', msgid):`
			`part = match.group()`
			`if not outside_quotes(match):`
			`continue`

			`if part.startswith('-'):`
			`if len(part) >= 2 and part[1].isalpha():`
			`if part == '-INF':`
			`continue`

			`warn(msg,`
			`'option-outside-quotes',`
			`'command line option outside %<quotes%>')`

			`if part.startswith('__builtin_'):`
			`warn(msg,`
			`'builtin-outside-quotes',`
			`'builtin function outside %<quotes%>')`

			`def lint_plain_apostrophe():`
			`for match in re.finditer("[^%]'", msgid):`
			`if outside_quotes(match):`
			`warn(msg, 'apostrophe', 'apostrophe without leading %')`

			`def lint_space_before_quote():`
			`"""`
			`A space before %< is often the result of string literals that`
			`are joined by the C compiler and neither literal has a space`
			`to separate the words.`
			`"""`

			`for match in re.finditer('(.?[a-zA-Z0-9])%<', msgid):`
			`if match.group(1) != '%s':`
			`warn(msg,`
			`'no-space-before-quote',`
			`'%< directly following a letter or digit')`

			`def lint_underscore_outside_quotes():`
			`"""`
			`An underscore outside of quotes is used in several contexts,`
			`and many of them violate the GCC Guidelines for Diagnostics:`

			`* names of GCC-internal compiler functions`
			`* names of GCC-internal data structures`
			`* static_cast and the like (which are legitimate)`
			`"""`

			`for match in re.finditer('_', msgid):`
			`if outside_quotes(match):`
			`warn(msg,`
			`'underscore-outside-quotes',`
			`'underscore outside of %<quotes%>')`
			`return`

			`def lint_may_not():`
			`"""`
			`The term "may not" may either mean "it could be the case"`
			`or "should not". These two different meanings are sometimes`
			`hard to tell apart.`
			`"""`

			`if re.search(r'\bmay not\b', msgid):`
			`warn(msg,`
			`'ambiguous-may-not',`
			`'the term "may not" is ambiguous')`

			`def lint_unbalanced_quotes():`
			`if msgid.count('%<') != msgid.count('%>'):`
			`warn(msg,`
			`'unbalanced-quotes',`
			`'unbalanced %< and %> quotes')`

			`if msg.translated():`
			`if msg.msgstr.count('%<') != msg.msgstr.count('%>'):`
			`warn(msg,`
			`'unbalanced-quotes',`
			`'unbalanced %< and %> quotes')`

			`def lint_single_space_after_sentence():`
			`"""`
			`After a sentence there should be two spaces.`
			`"""`

			`if re.search(r'[.] [A-Z]', msgid):`
			`warn(msg,`
			`'single-space-after-sentence',`
			`'single space after sentence')`

			`def lint_non_canonical_quotes():`
			`"""`
			`Catches %<%s%>, which can be written in the shorter form %qs.`
			`"""`
			match = re.search("%<%s%>\|'%s'\|\"%s\"\|`%s'", msgid)
			`if match:`
			`warn(msg,`
			`'non-canonical-quotes',`
			`f'placeholder {match.group()} should be written as %qs')`

			`lint_option_outside_quotes()`
			`lint_plain_apostrophe()`
			`lint_space_before_quote()`
			`lint_underscore_outside_quotes()`
			`lint_may_not()`
			`lint_unbalanced_quotes()`
			`lint_matching_placeholders()`
			`lint_single_space_after_sentence()`
			`lint_non_canonical_quotes()`


			`def lint_diagnostics_differing_only_in_placeholders(po: polib.POFile):`
			`"""`
			`Detects messages that are structurally the same, except that they`
			`use different plain strings inside %<quotes%>. These messages can`
			`be merged in order to prevent copy-and-paste mistakes by the`
			`translators.`

			`See bug 90119.`
			`"""`

			`seen: Dict[str, polib.POEntry] = {}`

			`for msg in po:`
			`msg: polib.POEntry`
			`msgid = msg.msgid`

			`normalized = re.sub('%<[^%]+%>', '%qs', msgid)`
			`if normalized not in seen:`
			`seen[normalized] = msg`
			`seen[msgid] = msg`
			`continue`

			`prev = seen[normalized]`
			`warn(msg,`
			`'same-pattern',`
			`f'same pattern for {repr(msgid)} and '`
			`f'{repr(prev.msgid)} in {location(prev)}',`
			`include_msgid=False)`


			`def lint_file(po: polib.POFile):`
			`for msg in po:`
			`msg: polib.POEntry`

			`if not msg.obsolete and not msg.fuzzy:`
			`if 'gcc-internal-format' in msg.flags:`
			`lint_gcc_internal_format(msg)`

			`lint_diagnostics_differing_only_in_placeholders(po)`


			`def main():`
			`parser = argparse.ArgumentParser(description='')`
			`parser.add_argument('file', help='pot file')`

			`args = parser.parse_args()`

			`po = polib.pofile(args.file)`
			`lint_file(po)`

			`print()`
			`print('summary:')`
			`for entry in seen_warnings.most_common():`
			`if entry[1] > 1:`
			`print(f'{entry[1]}\t{entry[0]}')`


			`if __name__ == '__main__':`
			`main()`