1# Copyright (c) 2020-2021 The Linux Foundation
2#
3# SPDX-License-Identifier: Apache-2.0
4
5import os
6import re
7from dataclasses import dataclass
8
9import yaml
10from west import log
11from west.util import WestNotFound, west_topdir
12
13import zspdx.spdxids
14from zspdx.cmakecache import parseCMakeCacheFile
15from zspdx.cmakefileapijson import parseReply
16from zspdx.datatypes import (
17    Document,
18    DocumentConfig,
19    File,
20    Package,
21    PackageConfig,
22    Relationship,
23    RelationshipData,
24    RelationshipDataElementType,
25)
26from zspdx.getincludes import getCIncludes
27
28
29# WalkerConfig contains configuration data for the Walker.
30@dataclass(eq=True)
31class WalkerConfig:
32    # prefix for Document namespaces; should not end with "/"
33    namespacePrefix: str = ""
34
35        # location of build directory
36    buildDir: str = ""
37
38    # should also analyze for included header files?
39    analyzeIncludes: bool = False
40
41    # should also add an SPDX document for the SDK?
42    includeSDK: bool = False
43
44# Walker is the main analysis class: it walks through the CMake codemodel,
45# build files, and corresponding source and SDK files, and gathers the
46# information needed to build the SPDX data classes.
47class Walker:
48    # initialize with WalkerConfig
49    def __init__(self, cfg):
50        super().__init__()
51
52        # configuration - WalkerConfig
53        self.cfg = cfg
54
55        # the various Documents that we will be building
56        self.docBuild = None
57        self.docZephyr = None
58        self.docApp = None
59        self.docSDK = None
60        self.docModulesExtRefs = None
61
62        # dict of absolute file path => the Document that owns that file
63        self.allFileLinks = {}
64
65        # queue of pending source Files to create, process and assign
66        self.pendingSources = []
67
68        # queue of pending relationships to create, process and assign
69        self.pendingRelationships = []
70
71        # parsed CMake codemodel
72        self.cm = None
73
74        # parsed CMake cache dict, once we have the build path
75        self.cmakeCache = {}
76
77        # C compiler path from parsed CMake cache
78        self.compilerPath = ""
79
80        # SDK install path from parsed CMake cache
81        self.sdkPath = ""
82
83    def _build_purl(self, url, version=None):
84        if not url:
85            return None
86
87        purl = None
88        # This is designed to match repository with the following url pattern:
89        # '<protocol><type>/<namespace>/<package>
90        COMMON_GIT_URL_REGEX=r'((git@|http(s)?:\/\/)(?P<type>[\w\.@]+)(\.\w+)(\/|:))(?P<namespace>[\w,\-,\_\/]+)\/(?P<package>[\w,\-,\_]+)(.git){0,1}((\/){0,1})$'
91
92        match = re.fullmatch(COMMON_GIT_URL_REGEX, url)
93        if match:
94            purl = f'pkg:{match.group("type")}/{match.group("namespace")}/{match.group("package")}'
95
96        if purl and (version or len(version) > 0):
97            purl += f'@{version}'
98
99        return purl
100
101    def _add_describe_relationship(self, doc, cfgpackage):
102        # create DESCRIBES relationship data
103        rd = RelationshipData()
104        rd.ownerType = RelationshipDataElementType.DOCUMENT
105        rd.ownerDocument = doc
106        rd.otherType = RelationshipDataElementType.PACKAGEID
107        rd.otherPackageID = cfgpackage.spdxID
108        rd.rlnType = "DESCRIBES"
109
110        # add it to pending relationships queue
111        self.pendingRelationships.append(rd)
112
113    # primary entry point
114    def makeDocuments(self):
115        # parse CMake cache file and get compiler path
116        log.inf("parsing CMake Cache file")
117        self.getCacheFile()
118
119        # check if meta file is generated
120        if not self.metaFile:
121            log.err("CONFIG_BUILD_OUTPUT_META must be enabled to generate spdx files; bailing")
122            return False
123
124        # parse codemodel from Walker cfg's build dir
125        log.inf("parsing CMake Codemodel files")
126        self.cm = self.getCodemodel()
127        if not self.cm:
128            log.err("could not parse codemodel from CMake API reply; bailing")
129            return False
130
131        # set up Documents
132        log.inf("setting up SPDX documents")
133        retval = self.setupDocuments()
134        if not retval:
135            return False
136
137        # walk through targets in codemodel to gather information
138        log.inf("walking through targets")
139        self.walkTargets()
140
141        # walk through pending sources and create corresponding files
142        log.inf("walking through pending sources files")
143        self.walkPendingSources()
144
145        # walk through pending relationship data and create relationships
146        log.inf("walking through pending relationships")
147        self.walkRelationships()
148
149        return True
150
151    # parse cache file and pull out relevant data
152    def getCacheFile(self):
153        cacheFilePath = os.path.join(self.cfg.buildDir, "CMakeCache.txt")
154        self.cmakeCache = parseCMakeCacheFile(cacheFilePath)
155        if self.cmakeCache:
156            self.compilerPath = self.cmakeCache.get("CMAKE_C_COMPILER", "")
157            self.sdkPath = self.cmakeCache.get("ZEPHYR_SDK_INSTALL_DIR", "")
158            self.metaFile =  self.cmakeCache.get("KERNEL_META_PATH", "")
159
160    # determine path from build dir to CMake file-based API index file, then
161    # parse it and return the Codemodel
162    def getCodemodel(self):
163        log.dbg("getting codemodel from CMake API reply files")
164
165        # make sure the reply directory exists
166        cmakeReplyDirPath = os.path.join(self.cfg.buildDir, ".cmake", "api", "v1", "reply")
167        if not os.path.exists(cmakeReplyDirPath):
168            log.err(f'cmake api reply directory {cmakeReplyDirPath} does not exist')
169            log.err('was query directory created before cmake build ran?')
170            return None
171        if not os.path.isdir(cmakeReplyDirPath):
172            log.err(f'cmake api reply directory {cmakeReplyDirPath} exists but is not a directory')
173            return None
174
175        # find file with "index" prefix; there should only be one
176        indexFilePath = ""
177        for f in os.listdir(cmakeReplyDirPath):
178            if f.startswith("index"):
179                indexFilePath = os.path.join(cmakeReplyDirPath, f)
180                break
181        if indexFilePath == "":
182            # didn't find it
183            log.err(f'cmake api reply index file not found in {cmakeReplyDirPath}')
184            return None
185
186        # parse it
187        return parseReply(indexFilePath)
188
189    def setupAppDocument(self):
190        # set up app document
191        cfgApp = DocumentConfig()
192        cfgApp.name = "app-sources"
193        cfgApp.namespace = self.cfg.namespacePrefix + "/app"
194        cfgApp.docRefID = "DocumentRef-app"
195        self.docApp = Document(cfgApp)
196
197        # also set up app sources package
198        cfgPackageApp = PackageConfig()
199        cfgPackageApp.name = "app-sources"
200        cfgPackageApp.spdxID = "SPDXRef-app-sources"
201        cfgPackageApp.primaryPurpose = "SOURCE"
202        # relativeBaseDir is app sources dir
203        cfgPackageApp.relativeBaseDir = self.cm.paths_source
204        pkgApp = Package(cfgPackageApp, self.docApp)
205        self.docApp.pkgs[pkgApp.cfg.spdxID] = pkgApp
206
207        self._add_describe_relationship(self.docApp, cfgPackageApp)
208
209    def setupBuildDocument(self):
210        # set up build document
211        cfgBuild = DocumentConfig()
212        cfgBuild.name = "build"
213        cfgBuild.namespace = self.cfg.namespacePrefix + "/build"
214        cfgBuild.docRefID = "DocumentRef-build"
215        self.docBuild = Document(cfgBuild)
216
217        # we'll create the build packages in walkTargets()
218
219        # the DESCRIBES relationship for the build document will be
220        # with the zephyr_final package
221        rd = RelationshipData()
222        rd.ownerType = RelationshipDataElementType.DOCUMENT
223        rd.ownerDocument = self.docBuild
224        rd.otherType = RelationshipDataElementType.TARGETNAME
225        rd.otherTargetName = "zephyr_final"
226        rd.rlnType = "DESCRIBES"
227
228        # add it to pending relationships queue
229        self.pendingRelationships.append(rd)
230
231    def setupZephyrDocument(self, zephyr, modules):
232        # set up zephyr document
233        cfgZephyr = DocumentConfig()
234        cfgZephyr.name = "zephyr-sources"
235        cfgZephyr.namespace = self.cfg.namespacePrefix + "/zephyr"
236        cfgZephyr.docRefID = "DocumentRef-zephyr"
237        self.docZephyr = Document(cfgZephyr)
238
239        # relativeBaseDir is Zephyr sources topdir
240        try:
241            relativeBaseDir = west_topdir(self.cm.paths_source)
242        except WestNotFound:
243            log.err("cannot find west_topdir for CMake Codemodel sources path "
244                    f"{self.cm.paths_source}; bailing")
245            return False
246
247        # set up zephyr sources package
248        cfgPackageZephyr = PackageConfig()
249        cfgPackageZephyr.name = "zephyr-sources"
250        cfgPackageZephyr.spdxID = "SPDXRef-zephyr-sources"
251        cfgPackageZephyr.relativeBaseDir = relativeBaseDir
252
253        zephyr_url = zephyr.get("remote", "")
254        if zephyr_url:
255            cfgPackageZephyr.url = zephyr_url
256
257        if zephyr.get("revision"):
258            cfgPackageZephyr.revision = zephyr.get("revision")
259
260        purl = None
261        zephyr_tags = zephyr.get("tags", "")
262        if zephyr_tags:
263            # Find tag vX.Y.Z
264            for tag in zephyr_tags:
265                version = re.fullmatch(r'^v(?P<version>\d+\.\d+\.\d+)$', tag)
266                purl = self._build_purl(zephyr_url, tag)
267
268                if purl:
269                    cfgPackageZephyr.externalReferences.append(purl)
270
271                # Extract version from tag once
272                if cfgPackageZephyr.version == "" and version:
273                    cfgPackageZephyr.version = version.group('version')
274
275        if len(cfgPackageZephyr.version) > 0:
276            cpe = f'cpe:2.3:o:zephyrproject:zephyr:{cfgPackageZephyr.version}:-:*:*:*:*:*:*'
277            cfgPackageZephyr.externalReferences.append(cpe)
278
279        pkgZephyr = Package(cfgPackageZephyr, self.docZephyr)
280        self.docZephyr.pkgs[pkgZephyr.cfg.spdxID] = pkgZephyr
281
282        self._add_describe_relationship(self.docZephyr, cfgPackageZephyr)
283
284        for module in modules:
285            module_name = module.get("name", None)
286            module_path = module.get("path", None)
287            module_url = module.get("remote", None)
288            module_revision = module.get("revision", None)
289
290            if not module_name:
291                log.err("cannot find module name in meta file; bailing")
292                return False
293
294            # set up zephyr sources package
295            cfgPackageZephyrModule = PackageConfig()
296            cfgPackageZephyrModule.name = module_name + "-sources"
297            cfgPackageZephyrModule.spdxID = "SPDXRef-" + module_name + "-sources"
298            cfgPackageZephyrModule.relativeBaseDir = module_path
299            cfgPackageZephyrModule.primaryPurpose = "SOURCE"
300
301            if module_revision:
302                cfgPackageZephyrModule.revision = module_revision
303
304            if module_url:
305                cfgPackageZephyrModule.url = module_url
306
307            pkgZephyrModule = Package(cfgPackageZephyrModule, self.docZephyr)
308            self.docZephyr.pkgs[pkgZephyrModule.cfg.spdxID] = pkgZephyrModule
309
310            self._add_describe_relationship(self.docZephyr, cfgPackageZephyrModule)
311
312        return True
313
314    def setupSDKDocument(self):
315        # set up SDK document
316        cfgSDK = DocumentConfig()
317        cfgSDK.name = "sdk"
318        cfgSDK.namespace = self.cfg.namespacePrefix + "/sdk"
319        cfgSDK.docRefID = "DocumentRef-sdk"
320        self.docSDK = Document(cfgSDK)
321
322        # also set up zephyr sdk package
323        cfgPackageSDK = PackageConfig()
324        cfgPackageSDK.name = "sdk"
325        cfgPackageSDK.spdxID = "SPDXRef-sdk"
326        # relativeBaseDir is SDK dir
327        cfgPackageSDK.relativeBaseDir = self.sdkPath
328        pkgSDK = Package(cfgPackageSDK, self.docSDK)
329        self.docSDK.pkgs[pkgSDK.cfg.spdxID] = pkgSDK
330
331        # create DESCRIBES relationship data
332        rd = RelationshipData()
333        rd.ownerType = RelationshipDataElementType.DOCUMENT
334        rd.ownerDocument = self.docSDK
335        rd.otherType = RelationshipDataElementType.PACKAGEID
336        rd.otherPackageID = cfgPackageSDK.spdxID
337        rd.rlnType = "DESCRIBES"
338
339        # add it to pending relationships queue
340        self.pendingRelationships.append(rd)
341
342    def setupModulesDocument(self, modules):
343        # set up zephyr document
344        cfgModuleExtRef = DocumentConfig()
345        cfgModuleExtRef.name = "modules-deps"
346        cfgModuleExtRef.namespace = self.cfg.namespacePrefix + "/modules-deps"
347        cfgModuleExtRef.docRefID = "DocumentRef-modules-deps"
348        self.docModulesExtRefs = Document(cfgModuleExtRef)
349
350        for module in modules:
351            module_name = module.get("name", None)
352            module_security = module.get("security", None)
353
354            if not module_name:
355                log.err("cannot find module name in meta file; bailing")
356                return False
357
358            module_ext_ref = []
359            if module_security:
360                module_ext_ref = module_security.get("external-references")
361
362            # set up zephyr sources package
363            cfgPackageModuleExtRef = PackageConfig()
364            cfgPackageModuleExtRef.name = module_name + "-deps"
365            cfgPackageModuleExtRef.spdxID = "SPDXRef-" + module_name + "-deps"
366
367            for ref in module_ext_ref:
368                cfgPackageModuleExtRef.externalReferences.append(ref)
369
370            pkgModule = Package(cfgPackageModuleExtRef, self.docModulesExtRefs)
371            self.docModulesExtRefs.pkgs[pkgModule.cfg.spdxID] = pkgModule
372
373            self._add_describe_relationship(self.docModulesExtRefs, cfgPackageModuleExtRef)
374
375
376    # set up Documents before beginning
377    def setupDocuments(self):
378        log.dbg("setting up placeholder documents")
379
380        self.setupBuildDocument()
381
382        try:
383            with open(self.metaFile) as file:
384                content = yaml.load(file.read(), yaml.SafeLoader)
385                if not self.setupZephyrDocument(content["zephyr"], content["modules"]):
386                    return False
387        except (FileNotFoundError, yaml.YAMLError):
388            log.err("cannot find a valid zephyr_meta.yml required for SPDX generation; bailing")
389            return False
390
391        self.setupAppDocument()
392
393        if self.cfg.includeSDK:
394            self.setupSDKDocument()
395
396        self.setupModulesDocument(content["modules"])
397
398        return True
399
400    # walk through targets and gather information
401    def walkTargets(self):
402        log.dbg("walking targets from codemodel")
403
404        # assuming just one configuration; consider whether this is incorrect
405        cfgTargets = self.cm.configurations[0].configTargets
406        for cfgTarget in cfgTargets:
407            # build the Package for this target
408            pkg = self.initConfigTargetPackage(cfgTarget)
409
410            # see whether this target has any build artifacts at all
411            if len(cfgTarget.target.artifacts) > 0:
412                # add its build file
413                bf = self.addBuildFile(cfgTarget, pkg)
414                if pkg.cfg.name == "zephyr_final":
415                    pkg.cfg.primaryPurpose = "APPLICATION"
416                else:
417                    pkg.cfg.primaryPurpose = "LIBRARY"
418
419                # get its source files if build file is found
420                if bf:
421                    self.collectPendingSourceFiles(cfgTarget, pkg, bf)
422            else:
423                log.dbg(f"  - target {cfgTarget.name} has no build artifacts")
424
425            # get its target dependencies
426            self.collectTargetDependencies(cfgTargets, cfgTarget, pkg)
427
428    # build a Package in the Build doc for the given ConfigTarget
429    def initConfigTargetPackage(self, cfgTarget):
430        log.dbg(f"  - initializing Package for target: {cfgTarget.name}")
431
432        # create target Package's config
433        cfg = PackageConfig()
434        cfg.name = cfgTarget.name
435        cfg.spdxID = "SPDXRef-" + zspdx.spdxids.convertToSPDXIDSafe(cfgTarget.name)
436        cfg.relativeBaseDir = self.cm.paths_build
437
438        # build Package
439        pkg = Package(cfg, self.docBuild)
440
441        # add Package to build Document
442        self.docBuild.pkgs[cfg.spdxID] = pkg
443        return pkg
444
445    # create a target's build product File and add it to its Package
446    # call with:
447    #   1) ConfigTarget
448    #   2) Package for that target
449    # returns: File
450    def addBuildFile(self, cfgTarget, pkg):
451        # assumes only one artifact in each target
452        artifactPath = os.path.join(pkg.cfg.relativeBaseDir, cfgTarget.target.artifacts[0])
453        log.dbg(f"  - adding File {artifactPath}")
454        log.dbg(f"    - relativeBaseDir: {pkg.cfg.relativeBaseDir}")
455        log.dbg(f"    - artifacts[0]: {cfgTarget.target.artifacts[0]}")
456
457        # don't create build File if artifact path points to nonexistent file
458        if not os.path.exists(artifactPath):
459            log.dbg(f"  - target {cfgTarget.name} lists build artifact {artifactPath} "
460                    "but file not found after build; skipping")
461            return None
462
463        # create build File
464        bf = File(self.docBuild, pkg)
465        bf.abspath = artifactPath
466        bf.relpath = cfgTarget.target.artifacts[0]
467        # can use nameOnDisk b/c it is just the filename w/out directory paths
468        bf.spdxID = zspdx.spdxids.getUniqueFileID(cfgTarget.target.nameOnDisk,
469                                                  self.docBuild.timesSeen)
470        # don't fill hashes / licenses / rlns now, we'll do that after walking
471
472        # add File to Package
473        pkg.files[bf.spdxID] = bf
474
475        # add file path link to Document and global links
476        self.docBuild.fileLinks[bf.abspath] = bf
477        self.allFileLinks[bf.abspath] = self.docBuild
478
479        # also set this file as the target package's build product file
480        pkg.targetBuildFile = bf
481
482        return bf
483
484    # collect a target's source files, add to pending sources queue, and
485    # create pending relationship data entry
486    # call with:
487    #   1) ConfigTarget
488    #   2) Package for that target
489    #   3) build File for that target
490    def collectPendingSourceFiles(self, cfgTarget, pkg, bf):
491        log.dbg("  - collecting source files and adding to pending queue")
492
493        targetIncludesSet = set()
494
495        # walk through target's sources
496        for src in cfgTarget.target.sources:
497            log.dbg(f"    - add pending source file and relationship for {src.path}")
498            # get absolute path if we don't have it
499            srcAbspath = src.path
500            if not os.path.isabs(src.path):
501                srcAbspath = os.path.join(self.cm.paths_source, src.path)
502
503            # check whether it even exists
504            if not (os.path.exists(srcAbspath) and os.path.isfile(srcAbspath)):
505                log.dbg(f"  - {srcAbspath} does not exist but is referenced in sources for "
506                        f"target {pkg.cfg.name}; skipping")
507                continue
508
509            # add it to pending source files queue
510            self.pendingSources.append(srcAbspath)
511
512            # create relationship data
513            rd = RelationshipData()
514            rd.ownerType = RelationshipDataElementType.FILENAME
515            rd.ownerFileAbspath = bf.abspath
516            rd.otherType = RelationshipDataElementType.FILENAME
517            rd.otherFileAbspath = srcAbspath
518            rd.rlnType = "GENERATED_FROM"
519
520            # add it to pending relationships queue
521            self.pendingRelationships.append(rd)
522
523            # collect this source file's includes
524            if self.cfg.analyzeIncludes and self.compilerPath:
525                includes = self.collectIncludes(cfgTarget, pkg, bf, src)
526                for inc in includes:
527                    targetIncludesSet.add(inc)
528
529        # make relationships for the overall included files,
530        # avoiding duplicates for multiple source files including
531        # the same headers
532        targetIncludesList = list(targetIncludesSet)
533        targetIncludesList.sort()
534        for inc in targetIncludesList:
535            # add it to pending source files queue
536            self.pendingSources.append(inc)
537
538            # create relationship data
539            rd = RelationshipData()
540            rd.ownerType = RelationshipDataElementType.FILENAME
541            rd.ownerFileAbspath = bf.abspath
542            rd.otherType = RelationshipDataElementType.FILENAME
543            rd.otherFileAbspath = inc
544            rd.rlnType = "GENERATED_FROM"
545
546            # add it to pending relationships queue
547            self.pendingRelationships.append(rd)
548
549    # collect the include files corresponding to this source file
550    # call with:
551    #   1) ConfigTarget
552    #   2) Package for this target
553    #   3) build File for this target
554    #   4) TargetSource entry for this source file
555    # returns: sorted list of include files for this source file
556    def collectIncludes(self, cfgTarget, pkg, bf, src):
557        # get the right compile group for this source file
558        if len(cfgTarget.target.compileGroups) < (src.compileGroupIndex + 1):
559            log.dbg(f"    - {cfgTarget.target.name} has compileGroupIndex {src.compileGroupIndex} "
560                    f"but only {len(cfgTarget.target.compileGroups)} found; "
561                    "skipping included files search")
562            return []
563        cg = cfgTarget.target.compileGroups[src.compileGroupIndex]
564
565        # currently only doing C includes
566        if cg.language != "C":
567            log.dbg(f"    - {cfgTarget.target.name} has compile group language {cg.language} "
568                    "but currently only searching includes for C files; "
569                    "skipping included files search")
570            return []
571
572        srcAbspath = src.path
573        if src.path[0] != "/":
574            srcAbspath = os.path.join(self.cm.paths_source, src.path)
575        return getCIncludes(self.compilerPath, srcAbspath, cg)
576
577    # collect relationships for dependencies of this target Package
578    # call with:
579    #   1) all ConfigTargets from CodeModel
580    #   2) this particular ConfigTarget
581    #   3) Package for this Target
582    def collectTargetDependencies(self, cfgTargets, cfgTarget, pkg):
583        log.dbg(f"  - collecting target dependencies for {pkg.cfg.name}")
584
585        # walk through target's dependencies
586        for dep in cfgTarget.target.dependencies:
587            # extract dep name from its id
588            depFragments = dep.id.split(":")
589            depName = depFragments[0]
590            log.dbg(f"    - adding pending relationship for {depName}")
591
592            # create relationship data between dependency packages
593            rd = RelationshipData()
594            rd.ownerType = RelationshipDataElementType.TARGETNAME
595            rd.ownerTargetName = pkg.cfg.name
596            rd.otherType = RelationshipDataElementType.TARGETNAME
597            rd.otherTargetName = depName
598            rd.rlnType = "HAS_PREREQUISITE"
599
600            # add it to pending relationships queue
601            self.pendingRelationships.append(rd)
602
603            # if this is a target with any build artifacts (e.g. non-UTILITY),
604            # also create STATIC_LINK relationship for dependency build files,
605            # together with this Package's own target build file
606            if len(cfgTarget.target.artifacts) == 0:
607                continue
608
609            # find the filename for the dependency's build product, using the
610            # codemodel (since we might not have created this dependency's
611            # Package or File yet)
612            depAbspath = ""
613            for ct in cfgTargets:
614                if ct.name == depName:
615                    # skip utility targets
616                    if len(ct.target.artifacts) == 0:
617                        continue
618                    # all targets use the same relativeBaseDir, so this works
619                    # even though pkg is the owner package
620                    depAbspath = os.path.join(pkg.cfg.relativeBaseDir, ct.target.artifacts[0])
621                    break
622            if depAbspath == "":
623                continue
624
625            # create relationship data between build files
626            rd = RelationshipData()
627            rd.ownerType = RelationshipDataElementType.FILENAME
628            rd.ownerFileAbspath = pkg.targetBuildFile.abspath
629            rd.otherType = RelationshipDataElementType.FILENAME
630            rd.otherFileAbspath = depAbspath
631            rd.rlnType = "STATIC_LINK"
632
633            # add it to pending relationships queue
634            self.pendingRelationships.append(rd)
635
636    # walk through pending sources and create corresponding files,
637    # assigning them to the appropriate Document and Package
638    def walkPendingSources(self):
639        log.dbg("walking pending sources")
640
641        # only one package in each doc; get it
642        pkgZephyr = list(self.docZephyr.pkgs.values())[0]
643        pkgApp = list(self.docApp.pkgs.values())[0]
644        if self.cfg.includeSDK:
645            pkgSDK = list(self.docSDK.pkgs.values())[0]
646
647        for srcAbspath in self.pendingSources:
648            # check whether we've already seen it
649            srcDoc = self.allFileLinks.get(srcAbspath, None)
650            srcPkg = None
651            if srcDoc:
652                log.dbg(f"  - {srcAbspath}: already seen, assigned to {srcDoc.cfg.name}")
653                continue
654
655            # not yet assigned; figure out where it goes
656            pkgBuild = self.findBuildPackage(srcAbspath)
657            pkgZephyr = self.findZephyrPackage(srcAbspath)
658
659            if pkgBuild:
660                log.dbg(f"  - {srcAbspath}: assigning to build document, "
661                        f"package {pkgBuild.cfg.name}")
662                srcDoc = self.docBuild
663                srcPkg = pkgBuild
664            elif (
665                self.cfg.includeSDK
666                and os.path.commonpath([srcAbspath, pkgSDK.cfg.relativeBaseDir])
667                == pkgSDK.cfg.relativeBaseDir
668            ):
669                log.dbg(f"  - {srcAbspath}: assigning to sdk document")
670                srcDoc = self.docSDK
671                srcPkg = pkgSDK
672            elif (
673                os.path.commonpath([srcAbspath, pkgApp.cfg.relativeBaseDir])
674                == pkgApp.cfg.relativeBaseDir
675            ):
676                log.dbg(f"  - {srcAbspath}: assigning to app document")
677                srcDoc = self.docApp
678                srcPkg = pkgApp
679            elif pkgZephyr:
680                log.dbg(f"  - {srcAbspath}: assigning to zephyr document")
681                srcDoc = self.docZephyr
682                srcPkg = pkgZephyr
683            else:
684                log.dbg(f"  - {srcAbspath}: can't determine which document should own; skipping")
685                continue
686
687            # create File and assign it to the Package and Document
688            sf = File(srcDoc, srcPkg)
689            sf.abspath = srcAbspath
690            sf.relpath = os.path.relpath(srcAbspath, srcPkg.cfg.relativeBaseDir)
691            filenameOnly = os.path.split(srcAbspath)[1]
692            sf.spdxID = zspdx.spdxids.getUniqueFileID(filenameOnly, srcDoc.timesSeen)
693            # don't fill hashes / licenses / rlns now, we'll do that after walking
694
695            # add File to Package
696            srcPkg.files[sf.spdxID] = sf
697
698            # add file path link to Document and global links
699            srcDoc.fileLinks[sf.abspath] = sf
700            self.allFileLinks[sf.abspath] = srcDoc
701
702    # figure out which Package contains the given file, if any
703    # call with:
704    #   1) absolute path for source filename being searched
705    def findPackageFromSrcAbsPath(self, document, srcAbspath):
706        # Multiple target Packages might "contain" the file path, if they
707        # are nested. If so, the one with the longest path would be the
708        # most deeply-nested target directory, so that's the one which
709        # should get the file path.
710        pkgLongestMatch = None
711        for pkg in document.pkgs.values():
712            if os.path.commonpath([srcAbspath, pkg.cfg.relativeBaseDir]) == pkg.cfg.relativeBaseDir:
713                # the package does contain this file; is it the deepest?
714                if pkgLongestMatch:
715                    if len(pkg.cfg.relativeBaseDir) > len(pkgLongestMatch.cfg.relativeBaseDir):
716                        pkgLongestMatch = pkg
717                else:
718                    # first package containing it, so assign it
719                    pkgLongestMatch = pkg
720
721        return pkgLongestMatch
722
723    def findBuildPackage(self, srcAbspath):
724        return self.findPackageFromSrcAbsPath(self.docBuild, srcAbspath)
725
726    def findZephyrPackage(self, srcAbspath):
727        return self.findPackageFromSrcAbsPath(self.docZephyr, srcAbspath)
728
729    # walk through pending RelationshipData entries, create corresponding
730    # Relationships, and assign them to the applicable Files / Packages
731    def walkRelationships(self):
732        for rlnData in self.pendingRelationships:
733            rln = Relationship()
734            # get left side of relationship data
735            docA, spdxIDA, rlnsA = self.getRelationshipLeft(rlnData)
736            if not docA or not spdxIDA:
737                continue
738            rln.refA = spdxIDA
739            # get right side of relationship data
740            spdxIDB = self.getRelationshipRight(rlnData, docA)
741            if not spdxIDB:
742                continue
743            rln.refB = spdxIDB
744            rln.rlnType = rlnData.rlnType
745            rlnsA.append(rln)
746            log.dbg(
747                f"  - adding relationship to {docA.cfg.name}: {rln.refA} {rln.rlnType} {rln.refB}"
748            )
749
750    # get owner (left side) document and SPDX ID of Relationship for given RelationshipData
751    # returns: doc, spdxID, rlnsArray (for either Document, Package, or File, as applicable)
752    def getRelationshipLeft(self, rlnData):
753        if rlnData.ownerType == RelationshipDataElementType.FILENAME:
754            # find the document for this file abspath, and then the specific file's ID
755            ownerDoc = self.allFileLinks.get(rlnData.ownerFileAbspath, None)
756            if not ownerDoc:
757                log.dbg(
758                    "  - searching for relationship, can't find document with file "
759                    f"{rlnData.ownerFileAbspath}; skipping"
760                )
761                return None, None, None
762            sf = ownerDoc.fileLinks.get(rlnData.ownerFileAbspath, None)
763            if not sf:
764                log.dbg(
765                    f"  - searching for relationship for file {rlnData.ownerFileAbspath} "
766                    f"points to document {ownerDoc.cfg.name} but file not found; skipping"
767                )
768                return None, None, None
769            # found it
770            if not sf.spdxID:
771                log.dbg(
772                    f"  - searching for relationship for file {rlnData.ownerFileAbspath} "
773                    "found file, but empty ID; skipping"
774                )
775                return None, None, None
776            return ownerDoc, sf.spdxID, sf.rlns
777        elif rlnData.ownerType == RelationshipDataElementType.TARGETNAME:
778            # find the document for this target name, and then the specific package's ID
779            # for target names, must be docBuild
780            ownerDoc = self.docBuild
781            # walk through target Packages and check names
782            for pkg in ownerDoc.pkgs.values():
783                if pkg.cfg.name == rlnData.ownerTargetName:
784                    if not pkg.cfg.spdxID:
785                        log.dbg(
786                            "  - searching for relationship for target "
787                            f"{rlnData.ownerTargetName} found package, but empty ID; skipping"
788                        )
789                        return None, None, None
790                    return ownerDoc, pkg.cfg.spdxID, pkg.rlns
791            log.dbg(
792                f"  - searching for relationship for target {rlnData.ownerTargetName}, "
793                "target not found in build document; skipping"
794            )
795            return None, None, None
796        elif rlnData.ownerType == RelationshipDataElementType.DOCUMENT:
797            # will always be SPDXRef-DOCUMENT
798            return rlnData.ownerDocument, "SPDXRef-DOCUMENT", rlnData.ownerDocument.relationships
799        else:
800            log.dbg(f"  - unknown relationship type {rlnData.ownerType}; skipping")
801            return None, None, None
802
803    # get other (right side) SPDX ID of Relationship for given RelationshipData
804    def getRelationshipRight(self, rlnData, docA):
805        if rlnData.otherType == RelationshipDataElementType.FILENAME:
806            # find the document for this file abspath, and then the specific file's ID
807            otherDoc = self.allFileLinks.get(rlnData.otherFileAbspath, None)
808            if not otherDoc:
809                log.dbg(
810                    "  - searching for relationship, can't find document with file "
811                    f"{rlnData.otherFileAbspath}; skipping"
812                )
813                return None
814            bf = otherDoc.fileLinks.get(rlnData.otherFileAbspath, None)
815            if not bf:
816                log.dbg(
817                    f"  - searching for relationship for file {rlnData.otherFileAbspath} "
818                    f"points to document {otherDoc.cfg.name} but file not found; skipping"
819                )
820                return None
821            # found it
822            if not bf.spdxID:
823                log.dbg(
824                    f"  - searching for relationship for file {rlnData.otherFileAbspath} "
825                    "found file, but empty ID; skipping"
826                )
827                return None
828            # figure out whether to append DocumentRef
829            spdxIDB = bf.spdxID
830            if otherDoc != docA:
831                spdxIDB = otherDoc.cfg.docRefID + ":" + spdxIDB
832                docA.externalDocuments.add(otherDoc)
833            return spdxIDB
834        elif rlnData.otherType == RelationshipDataElementType.TARGETNAME:
835            # find the document for this target name, and then the specific package's ID
836            # for target names, must be docBuild
837            otherDoc = self.docBuild
838            # walk through target Packages and check names
839            for pkg in otherDoc.pkgs.values():
840                if pkg.cfg.name == rlnData.otherTargetName:
841                    if not pkg.cfg.spdxID:
842                        log.dbg(
843                            f"  - searching for relationship for target {rlnData.otherTargetName}"
844                            " found package, but empty ID; skipping"
845                        )
846                        return None
847                    spdxIDB = pkg.cfg.spdxID
848                    if otherDoc != docA:
849                        spdxIDB = otherDoc.cfg.docRefID + ":" + spdxIDB
850                        docA.externalDocuments.add(otherDoc)
851                    return spdxIDB
852            log.dbg(
853                f"  - searching for relationship for target {rlnData.otherTargetName}, "
854                "target not found in build document; skipping"
855            )
856            return None
857        elif rlnData.otherType == RelationshipDataElementType.PACKAGEID:
858            # will just be the package ID that was passed in
859            return rlnData.otherPackageID
860        else:
861            log.dbg(f"  - unknown relationship type {rlnData.otherType}; skipping")
862            return None
863