Adding tarfile member sanitization to extractall() (#1709)

Fix a widespread bug named CVE-2007-4559, which is a 15 year old bug
in the Python tarfile package. By using extract() or extractall() on a tarfile
object without sanitizing input, a maliciously crafted .tar file could
perform a directory path traversal attack. This patch essentially checks to
see if all tarfile members will be extracted safely and throws an exception
otherwise.
This commit is contained in:
TrellixVulnTeam 2022-11-16 21:52:30 -06:00 committed by GitHub
parent 6c16ff7654
commit 7ad3412591
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -111,7 +111,26 @@ def unpack(tar_file, strip_prefix, dest_dir):
with tempfile.TemporaryDirectory() as tmp:
with tarfile.open(tar_file) as tar:
logger.debug(f"extract to {tmp}")
tar.extractall(tmp)
def is_within_directory(directory, target):
abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)
prefix = os.path.commonprefix([abs_directory, abs_target])
return prefix == abs_directory
def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")
tar.extractall(path, members, numeric_owner=numeric_owner)
safe_extract(tar, tmp)
strip_prefix_dir = (
pathlib.Path(tmp).joinpath(strip_prefix + os.path.sep).resolve()