1#!/usr/bin/env python3
2"""This script compares the interfaces of two versions of Mbed TLS, looking
3for backward incompatibilities between two different Git revisions within
4an Mbed TLS repository. It must be run from the root of a Git working tree.
5
6### How the script works ###
7
8For the source (API) and runtime (ABI) interface compatibility, this script
9is a small wrapper around the abi-compliance-checker and abi-dumper tools,
10applying them to compare the header and library files.
11
12For the storage format, this script compares the automatically generated
13storage tests and the manual read tests, and complains if there is a
14reduction in coverage. A change in test data will be signaled as a
15coverage reduction since the old test data is no longer present. A change in
16how test data is presented will be signaled as well; this would be a false
17positive.
18
19The results of the API/ABI comparison are either formatted as HTML and stored
20at a configurable location, or are given as a brief list of problems.
21Returns 0 on success, 1 on non-compliance, and 2 if there is an error
22while running the script.
23
24### How to interpret non-compliance ###
25
26This script has relatively common false positives. In many scenarios, it only
27reports a pass if there is a strict textual match between the old version and
28the new version, and it reports problems where there is a sufficient semantic
29match but not a textual match. This section lists some common false positives.
30This is not an exhaustive list: in the end what matters is whether we are
31breaking a backward compatibility goal.
32
33**API**: the goal is that if an application works with the old version of the
34library, it can be recompiled against the new version and will still work.
35This is normally validated by comparing the declarations in `include/*/*.h`.
36A failure is a declaration that has disappeared or that now has a different
37type.
38
39  * It's ok to change or remove macros and functions that are documented as
40    for internal use only or as experimental.
41  * It's ok to rename function or macro parameters as long as the semantics
42    has not changed.
43  * It's ok to change or remove structure fields that are documented as
44    private.
45  * It's ok to add fields to a structure that already had private fields
46    or was documented as extensible.
47
48**ABI**: the goal is that if an application was built against the old version
49of the library, the same binary will work when linked against the new version.
50This is normally validated by comparing the symbols exported by `libmbed*.so`.
51A failure is a symbol that is no longer exported by the same library or that
52now has a different type.
53
54  * All ABI changes are acceptable if the library version is bumped
55    (see `scripts/bump_version.sh`).
56  * ABI changes that concern functions which are declared only inside the
57    library directory, and not in `include/*/*.h`, are acceptable only if
58    the function was only ever used inside the same library (libmbedcrypto,
59    libmbedx509, libmbedtls). As a counter example, if the old version
60    of libmbedtls calls mbedtls_foo() from libmbedcrypto, and the new version
61    of libmbedcrypto no longer has a compatible mbedtls_foo(), this does
62    require a version bump for libmbedcrypto.
63
64**Storage format**: the goal is to check that persistent keys stored by the
65old version can be read by the new version. This is normally validated by
66comparing the `*read*` test cases in `test_suite*storage_format*.data`.
67A failure is a storage read test case that is no longer present with the same
68function name and parameter list.
69
70  * It's ok if the same test data is present, but its presentation has changed,
71    for example if a test function is renamed or has different parameters.
72  * It's ok if redundant tests are removed.
73
74**Generated test coverage**: the goal is to check that automatically
75generated tests have as much coverage as before. This is normally validated
76by comparing the test cases that are automatically generated by a script.
77A failure is a generated test case that is no longer present with the same
78function name and parameter list.
79
80  * It's ok if the same test data is present, but its presentation has changed,
81    for example if a test function is renamed or has different parameters.
82  * It's ok if redundant tests are removed.
83
84"""
85
86# Copyright The Mbed TLS Contributors
87# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
88
89import glob
90import os
91import re
92import sys
93import traceback
94import shutil
95import subprocess
96import argparse
97import logging
98import tempfile
99import fnmatch
100from types import SimpleNamespace
101
102import xml.etree.ElementTree as ET
103
104import framework_scripts_path # pylint: disable=unused-import
105from mbedtls_framework import build_tree
106
107
108class AbiChecker:
109    """API and ABI checker."""
110
111    def __init__(self, old_version, new_version, configuration):
112        """Instantiate the API/ABI checker.
113
114        old_version: RepoVersion containing details to compare against
115        new_version: RepoVersion containing details to check
116        configuration.report_dir: directory for output files
117        configuration.keep_all_reports: if false, delete old reports
118        configuration.brief: if true, output shorter report to stdout
119        configuration.check_abi: if true, compare ABIs
120        configuration.check_api: if true, compare APIs
121        configuration.check_storage: if true, compare storage format tests
122        configuration.skip_file: path to file containing symbols and types to skip
123        """
124        self.repo_path = "."
125        self.log = None
126        self.verbose = configuration.verbose
127        self._setup_logger()
128        self.report_dir = os.path.abspath(configuration.report_dir)
129        self.keep_all_reports = configuration.keep_all_reports
130        self.can_remove_report_dir = not (os.path.exists(self.report_dir) or
131                                          self.keep_all_reports)
132        self.old_version = old_version
133        self.new_version = new_version
134        self.skip_file = configuration.skip_file
135        self.check_abi = configuration.check_abi
136        self.check_api = configuration.check_api
137        if self.check_abi != self.check_api:
138            raise Exception('Checking API without ABI or vice versa is not supported')
139        self.check_storage_tests = configuration.check_storage
140        self.brief = configuration.brief
141        self.git_command = "git"
142        self.make_command = "make"
143
144    def _setup_logger(self):
145        self.log = logging.getLogger()
146        if self.verbose:
147            self.log.setLevel(logging.DEBUG)
148        else:
149            self.log.setLevel(logging.INFO)
150        self.log.addHandler(logging.StreamHandler())
151
152    @staticmethod
153    def check_abi_tools_are_installed():
154        for command in ["abi-dumper", "abi-compliance-checker"]:
155            if not shutil.which(command):
156                raise Exception("{} not installed, aborting".format(command))
157
158    def _get_clean_worktree_for_git_revision(self, version):
159        """Make a separate worktree with version.revision checked out.
160        Do not modify the current worktree."""
161        git_worktree_path = tempfile.mkdtemp()
162        if version.repository:
163            self.log.debug(
164                "Checking out git worktree for revision {} from {}".format(
165                    version.revision, version.repository
166                )
167            )
168            fetch_output = subprocess.check_output(
169                [self.git_command, "fetch",
170                 version.repository, version.revision],
171                cwd=self.repo_path,
172                stderr=subprocess.STDOUT
173            )
174            self.log.debug(fetch_output.decode("utf-8"))
175            worktree_rev = "FETCH_HEAD"
176        else:
177            self.log.debug("Checking out git worktree for revision {}".format(
178                version.revision
179            ))
180            worktree_rev = version.revision
181        worktree_output = subprocess.check_output(
182            [self.git_command, "worktree", "add", "--detach",
183             git_worktree_path, worktree_rev],
184            cwd=self.repo_path,
185            stderr=subprocess.STDOUT
186        )
187        self.log.debug(worktree_output.decode("utf-8"))
188        version.commit = subprocess.check_output(
189            [self.git_command, "rev-parse", "HEAD"],
190            cwd=git_worktree_path,
191            stderr=subprocess.STDOUT
192        ).decode("ascii").rstrip()
193        self.log.debug("Commit is {}".format(version.commit))
194        return git_worktree_path
195
196    def _update_git_submodules(self, git_worktree_path, version):
197        """If the crypto submodule is present, initialize it.
198        if version.crypto_revision exists, update it to that revision,
199        otherwise update it to the default revision"""
200        update_output = subprocess.check_output(
201            [self.git_command, "submodule", "update", "--init", '--recursive'],
202            cwd=git_worktree_path,
203            stderr=subprocess.STDOUT
204        )
205        self.log.debug(update_output.decode("utf-8"))
206        if not (os.path.exists(os.path.join(git_worktree_path, "crypto"))
207                and version.crypto_revision):
208            return
209
210        if version.crypto_repository:
211            fetch_output = subprocess.check_output(
212                [self.git_command, "fetch", version.crypto_repository,
213                 version.crypto_revision],
214                cwd=os.path.join(git_worktree_path, "crypto"),
215                stderr=subprocess.STDOUT
216            )
217            self.log.debug(fetch_output.decode("utf-8"))
218            crypto_rev = "FETCH_HEAD"
219        else:
220            crypto_rev = version.crypto_revision
221
222        checkout_output = subprocess.check_output(
223            [self.git_command, "checkout", crypto_rev],
224            cwd=os.path.join(git_worktree_path, "crypto"),
225            stderr=subprocess.STDOUT
226        )
227        self.log.debug(checkout_output.decode("utf-8"))
228
229    def _build_shared_libraries(self, git_worktree_path, version):
230        """Build the shared libraries in the specified worktree."""
231        my_environment = os.environ.copy()
232        my_environment["CFLAGS"] = "-g -Og"
233        my_environment["SHARED"] = "1"
234        if os.path.exists(os.path.join(git_worktree_path, "crypto")):
235            my_environment["USE_CRYPTO_SUBMODULE"] = "1"
236        make_output = subprocess.check_output(
237            [self.make_command, "lib"],
238            env=my_environment,
239            cwd=git_worktree_path,
240            stderr=subprocess.STDOUT
241        )
242        self.log.debug(make_output.decode("utf-8"))
243        for root, _dirs, files in os.walk(git_worktree_path):
244            for file in fnmatch.filter(files, "*.so"):
245                version.modules[os.path.splitext(file)[0]] = (
246                    os.path.join(root, file)
247                )
248
249    @staticmethod
250    def _pretty_revision(version):
251        if version.revision == version.commit:
252            return version.revision
253        else:
254            return "{} ({})".format(version.revision, version.commit)
255
256    def _get_abi_dumps_from_shared_libraries(self, version):
257        """Generate the ABI dumps for the specified git revision.
258        The shared libraries must have been built and the module paths
259        present in version.modules."""
260        for mbed_module, module_path in version.modules.items():
261            output_path = os.path.join(
262                self.report_dir, "{}-{}-{}.dump".format(
263                    mbed_module, version.revision, version.version
264                )
265            )
266            abi_dump_command = [
267                "abi-dumper",
268                module_path,
269                "-o", output_path,
270                "-lver", self._pretty_revision(version),
271            ]
272            abi_dump_output = subprocess.check_output(
273                abi_dump_command,
274                stderr=subprocess.STDOUT
275            )
276            self.log.debug(abi_dump_output.decode("utf-8"))
277            version.abi_dumps[mbed_module] = output_path
278
279    @staticmethod
280    def _normalize_storage_test_case_data(line):
281        """Eliminate cosmetic or irrelevant details in storage format test cases."""
282        line = re.sub(r'\s+', r'', line)
283        return line
284
285    def _read_storage_tests(self,
286                            directory,
287                            filename,
288                            is_generated,
289                            storage_tests):
290        """Record storage tests from the given file.
291
292        Populate the storage_tests dictionary with test cases read from
293        filename under directory.
294        """
295        at_paragraph_start = True
296        description = None
297        full_path = os.path.join(directory, filename)
298        with open(full_path) as fd:
299            for line_number, line in enumerate(fd, 1):
300                line = line.strip()
301                if not line:
302                    at_paragraph_start = True
303                    continue
304                if line.startswith('#'):
305                    continue
306                if at_paragraph_start:
307                    description = line.strip()
308                    at_paragraph_start = False
309                    continue
310                if line.startswith('depends_on:'):
311                    continue
312                # We've reached a test case data line
313                test_case_data = self._normalize_storage_test_case_data(line)
314                if not is_generated:
315                    # In manual test data, only look at read tests.
316                    function_name = test_case_data.split(':', 1)[0]
317                    if 'read' not in function_name.split('_'):
318                        continue
319                metadata = SimpleNamespace(
320                    filename=filename,
321                    line_number=line_number,
322                    description=description
323                )
324                storage_tests[test_case_data] = metadata
325
326    @staticmethod
327    def _list_generated_test_data_files(git_worktree_path):
328        """List the generated test data files."""
329        generate_psa_tests = 'framework/scripts/generate_psa_tests.py'
330        if not os.path.isfile(git_worktree_path + '/' + generate_psa_tests):
331            # The checked-out revision is from before generate_psa_tests.py
332            # was moved to the framework submodule. Use the old location.
333            generate_psa_tests = 'tests/scripts/generate_psa_tests.py'
334
335        output = subprocess.check_output(
336            [generate_psa_tests, '--list'],
337            cwd=git_worktree_path,
338        ).decode('ascii')
339        return [line for line in output.split('\n') if line]
340
341    def _get_storage_format_tests(self, version, git_worktree_path):
342        """Record the storage format tests for the specified git version.
343
344        The storage format tests are the test suite data files whose name
345        contains "storage_format".
346
347        The version must be checked out at git_worktree_path.
348
349        This function creates or updates the generated data files.
350        """
351        # Existing test data files. This may be missing some automatically
352        # generated files if they haven't been generated yet.
353        if os.path.isdir(os.path.join(git_worktree_path, 'tf-psa-crypto',
354                                      'tests', 'suites')):
355            storage_data_files = set(glob.glob(
356                'tf-psa-crypto/tests/suites/test_suite_*storage_format*.data'
357            ))
358        else:
359            storage_data_files = set(glob.glob(
360                'tests/suites/test_suite_*storage_format*.data'
361            ))
362        # Discover and (re)generate automatically generated data files.
363        to_be_generated = set()
364        for filename in self._list_generated_test_data_files(git_worktree_path):
365            if 'storage_format' in filename:
366                storage_data_files.add(filename)
367                to_be_generated.add(filename)
368
369        generate_psa_tests = 'framework/scripts/generate_psa_tests.py'
370        if not os.path.isfile(git_worktree_path + '/' + generate_psa_tests):
371            # The checked-out revision is from before generate_psa_tests.py
372            # was moved to the framework submodule. Use the old location.
373            generate_psa_tests = 'tests/scripts/generate_psa_tests.py'
374        subprocess.check_call(
375            [generate_psa_tests] + sorted(to_be_generated),
376            cwd=git_worktree_path,
377        )
378        for test_file in sorted(storage_data_files):
379            self._read_storage_tests(git_worktree_path,
380                                     test_file,
381                                     test_file in to_be_generated,
382                                     version.storage_tests)
383
384    def _cleanup_worktree(self, git_worktree_path):
385        """Remove the specified git worktree."""
386        shutil.rmtree(git_worktree_path)
387        worktree_output = subprocess.check_output(
388            [self.git_command, "worktree", "prune"],
389            cwd=self.repo_path,
390            stderr=subprocess.STDOUT
391        )
392        self.log.debug(worktree_output.decode("utf-8"))
393
394    def _get_abi_dump_for_ref(self, version):
395        """Generate the interface information for the specified git revision."""
396        git_worktree_path = self._get_clean_worktree_for_git_revision(version)
397        self._update_git_submodules(git_worktree_path, version)
398        if self.check_abi:
399            self._build_shared_libraries(git_worktree_path, version)
400            self._get_abi_dumps_from_shared_libraries(version)
401        if self.check_storage_tests:
402            self._get_storage_format_tests(version, git_worktree_path)
403        self._cleanup_worktree(git_worktree_path)
404
405    def _remove_children_with_tag(self, parent, tag):
406        children = parent.getchildren()
407        for child in children:
408            if child.tag == tag:
409                parent.remove(child)
410            else:
411                self._remove_children_with_tag(child, tag)
412
413    def _remove_extra_detail_from_report(self, report_root):
414        for tag in ['test_info', 'test_results', 'problem_summary',
415                    'added_symbols', 'affected']:
416            self._remove_children_with_tag(report_root, tag)
417
418        for report in report_root:
419            for problems in report.getchildren()[:]:
420                if not problems.getchildren():
421                    report.remove(problems)
422
423    def _abi_compliance_command(self, mbed_module, output_path):
424        """Build the command to run to analyze the library mbed_module.
425        The report will be placed in output_path."""
426        abi_compliance_command = [
427            "abi-compliance-checker",
428            "-l", mbed_module,
429            "-old", self.old_version.abi_dumps[mbed_module],
430            "-new", self.new_version.abi_dumps[mbed_module],
431            "-strict",
432            "-report-path", output_path,
433        ]
434        if self.skip_file:
435            abi_compliance_command += ["-skip-symbols", self.skip_file,
436                                       "-skip-types", self.skip_file]
437        if self.brief:
438            abi_compliance_command += ["-report-format", "xml",
439                                       "-stdout"]
440        return abi_compliance_command
441
442    def _is_library_compatible(self, mbed_module, compatibility_report):
443        """Test if the library mbed_module has remained compatible.
444        Append a message regarding compatibility to compatibility_report."""
445        output_path = os.path.join(
446            self.report_dir, "{}-{}-{}.html".format(
447                mbed_module, self.old_version.revision,
448                self.new_version.revision
449            )
450        )
451        try:
452            subprocess.check_output(
453                self._abi_compliance_command(mbed_module, output_path),
454                stderr=subprocess.STDOUT
455            )
456        except subprocess.CalledProcessError as err:
457            if err.returncode != 1:
458                raise err
459            if self.brief:
460                self.log.info(
461                    "Compatibility issues found for {}".format(mbed_module)
462                )
463                report_root = ET.fromstring(err.output.decode("utf-8"))
464                self._remove_extra_detail_from_report(report_root)
465                self.log.info(ET.tostring(report_root).decode("utf-8"))
466            else:
467                self.can_remove_report_dir = False
468                compatibility_report.append(
469                    "Compatibility issues found for {}, "
470                    "for details see {}".format(mbed_module, output_path)
471                )
472            return False
473        compatibility_report.append(
474            "No compatibility issues for {}".format(mbed_module)
475        )
476        if not (self.keep_all_reports or self.brief):
477            os.remove(output_path)
478        return True
479
480    @staticmethod
481    def _is_storage_format_compatible(old_tests, new_tests,
482                                      compatibility_report):
483        """Check whether all tests present in old_tests are also in new_tests.
484
485        Append a message regarding compatibility to compatibility_report.
486        """
487        missing = frozenset(old_tests.keys()).difference(new_tests.keys())
488        for test_data in sorted(missing):
489            metadata = old_tests[test_data]
490            compatibility_report.append(
491                'Test case from {} line {} "{}" has disappeared: {}'.format(
492                    metadata.filename, metadata.line_number,
493                    metadata.description, test_data
494                )
495            )
496        compatibility_report.append(
497            'FAIL: {}/{} storage format test cases have changed or disappeared.'.format(
498                len(missing), len(old_tests)
499            ) if missing else
500            'PASS: All {} storage format test cases are preserved.'.format(
501                len(old_tests)
502            )
503        )
504        compatibility_report.append(
505            'Info: number of storage format tests cases: {} -> {}.'.format(
506                len(old_tests), len(new_tests)
507            )
508        )
509        return not missing
510
511    def get_abi_compatibility_report(self):
512        """Generate a report of the differences between the reference ABI
513        and the new ABI. ABI dumps from self.old_version and self.new_version
514        must be available."""
515        compatibility_report = ["Checking evolution from {} to {}".format(
516            self._pretty_revision(self.old_version),
517            self._pretty_revision(self.new_version)
518        )]
519        compliance_return_code = 0
520
521        if self.check_abi:
522            shared_modules = list(set(self.old_version.modules.keys()) &
523                                  set(self.new_version.modules.keys()))
524            for mbed_module in shared_modules:
525                if not self._is_library_compatible(mbed_module,
526                                                   compatibility_report):
527                    compliance_return_code = 1
528
529        if self.check_storage_tests:
530            if not self._is_storage_format_compatible(
531                    self.old_version.storage_tests,
532                    self.new_version.storage_tests,
533                    compatibility_report):
534                compliance_return_code = 1
535
536        for version in [self.old_version, self.new_version]:
537            for mbed_module, mbed_module_dump in version.abi_dumps.items():
538                os.remove(mbed_module_dump)
539        if self.can_remove_report_dir:
540            os.rmdir(self.report_dir)
541        self.log.info("\n".join(compatibility_report))
542        return compliance_return_code
543
544    def check_for_abi_changes(self):
545        """Generate a report of ABI differences
546        between self.old_rev and self.new_rev."""
547        build_tree.check_repo_path()
548        if self.check_api or self.check_abi:
549            self.check_abi_tools_are_installed()
550        self._get_abi_dump_for_ref(self.old_version)
551        self._get_abi_dump_for_ref(self.new_version)
552        return self.get_abi_compatibility_report()
553
554
555def run_main():
556    try:
557        parser = argparse.ArgumentParser(
558            description=__doc__
559        )
560        parser.add_argument(
561            "-v", "--verbose", action="store_true",
562            help="set verbosity level",
563        )
564        parser.add_argument(
565            "-r", "--report-dir", type=str, default="reports",
566            help="directory where reports are stored, default is reports",
567        )
568        parser.add_argument(
569            "-k", "--keep-all-reports", action="store_true",
570            help="keep all reports, even if there are no compatibility issues",
571        )
572        parser.add_argument(
573            "-o", "--old-rev", type=str, help="revision for old version.",
574            required=True,
575        )
576        parser.add_argument(
577            "-or", "--old-repo", type=str, help="repository for old version."
578        )
579        parser.add_argument(
580            "-oc", "--old-crypto-rev", type=str,
581            help="revision for old crypto submodule."
582        )
583        parser.add_argument(
584            "-ocr", "--old-crypto-repo", type=str,
585            help="repository for old crypto submodule."
586        )
587        parser.add_argument(
588            "-n", "--new-rev", type=str, help="revision for new version",
589            required=True,
590        )
591        parser.add_argument(
592            "-nr", "--new-repo", type=str, help="repository for new version."
593        )
594        parser.add_argument(
595            "-nc", "--new-crypto-rev", type=str,
596            help="revision for new crypto version"
597        )
598        parser.add_argument(
599            "-ncr", "--new-crypto-repo", type=str,
600            help="repository for new crypto submodule."
601        )
602        parser.add_argument(
603            "-s", "--skip-file", type=str,
604            help=("path to file containing symbols and types to skip "
605                  "(typically \"-s identifiers\" after running "
606                  "\"tests/scripts/list-identifiers.sh --internal\")")
607        )
608        parser.add_argument(
609            "--check-abi",
610            action='store_true', default=True,
611            help="Perform ABI comparison (default: yes)"
612        )
613        parser.add_argument("--no-check-abi", action='store_false', dest='check_abi')
614        parser.add_argument(
615            "--check-api",
616            action='store_true', default=True,
617            help="Perform API comparison (default: yes)"
618        )
619        parser.add_argument("--no-check-api", action='store_false', dest='check_api')
620        parser.add_argument(
621            "--check-storage",
622            action='store_true', default=True,
623            help="Perform storage tests comparison (default: yes)"
624        )
625        parser.add_argument("--no-check-storage", action='store_false', dest='check_storage')
626        parser.add_argument(
627            "-b", "--brief", action="store_true",
628            help="output only the list of issues to stdout, instead of a full report",
629        )
630        abi_args = parser.parse_args()
631        if os.path.isfile(abi_args.report_dir):
632            print("Error: {} is not a directory".format(abi_args.report_dir))
633            parser.exit()
634        old_version = SimpleNamespace(
635            version="old",
636            repository=abi_args.old_repo,
637            revision=abi_args.old_rev,
638            commit=None,
639            crypto_repository=abi_args.old_crypto_repo,
640            crypto_revision=abi_args.old_crypto_rev,
641            abi_dumps={},
642            storage_tests={},
643            modules={}
644        )
645        new_version = SimpleNamespace(
646            version="new",
647            repository=abi_args.new_repo,
648            revision=abi_args.new_rev,
649            commit=None,
650            crypto_repository=abi_args.new_crypto_repo,
651            crypto_revision=abi_args.new_crypto_rev,
652            abi_dumps={},
653            storage_tests={},
654            modules={}
655        )
656        configuration = SimpleNamespace(
657            verbose=abi_args.verbose,
658            report_dir=abi_args.report_dir,
659            keep_all_reports=abi_args.keep_all_reports,
660            brief=abi_args.brief,
661            check_abi=abi_args.check_abi,
662            check_api=abi_args.check_api,
663            check_storage=abi_args.check_storage,
664            skip_file=abi_args.skip_file
665        )
666        abi_check = AbiChecker(old_version, new_version, configuration)
667        return_code = abi_check.check_for_abi_changes()
668        sys.exit(return_code)
669    except Exception: # pylint: disable=broad-except
670        # Print the backtrace and exit explicitly so as to exit with
671        # status 2, not 1.
672        traceback.print_exc()
673        sys.exit(2)
674
675
676if __name__ == "__main__":
677    run_main()
678