blob: 46688207509c244241ee9a2dee56220a9675d558 [file] [log] [blame]
# Copyright (c) 2020, 2021 The Linux Foundation
#
# SPDX-License-Identifier: Apache-2.0
import re
def getSPDXIDSafeCharacter(c):
"""
Converts a character to an SPDX-ID-safe character.
Arguments:
- c: character to test
Returns: c if it is SPDX-ID-safe (letter, number, '-' or '.');
'-' otherwise
"""
if c.isalpha() or c.isdigit() or c == "-" or c == ".":
return c
return "-"
def convertToSPDXIDSafe(s):
"""
Converts a filename or other string to only SPDX-ID-safe characters.
Note that a separate check (such as in getUniqueID, below) will need
to be used to confirm that this is still a unique identifier, after
conversion.
Arguments:
- s: string to be converted.
Returns: string with all non-safe characters replaced with dashes.
"""
return "".join([getSPDXIDSafeCharacter(c) for c in s])
def getUniqueFileID(filenameOnly, timesSeen):
"""
Find an SPDX ID that is unique among others seen so far.
Arguments:
- filenameOnly: filename only (directories omitted) seeking ID.
- timesSeen: dict of all filename-only to number of times seen.
Returns: unique SPDX ID; updates timesSeen to include it.
"""
converted = convertToSPDXIDSafe(filenameOnly)
spdxID = f"SPDXRef-File-{converted}"
# determine whether spdxID is unique so far, or not
filenameTimesSeen = timesSeen.get(converted, 0) + 1
if filenameTimesSeen > 1:
# we'll append the # of times seen to the end
spdxID += f"-{filenameTimesSeen}"
else:
# first time seeing this filename
# edge case: if the filename itself ends in "-{number}", then we
# need to add a "-1" to it, so that we don't end up overlapping
# with an appended number from a similarly-named file.
p = re.compile(r"-\d+$")
if p.search(converted):
spdxID += "-1"
timesSeen[converted] = filenameTimesSeen
return spdxID