Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Lib/tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2360,11 +2360,14 @@ def addfile(self, tarinfo, fileobj=None):
raise ValueError("fileobj not provided for non zero-size regular file")

tarinfo = copy.copy(tarinfo)

# get current offset
tarinfo.offset = self.offset
buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
self.fileobj.write(buf)
self.offset += len(buf)
# add original offset to block size
bufsize=self.copybufsize
tarinfo.offset_data = self.offset
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hi there, this seems wrong. you are replacing information about where the file data starts with information about where the file header starts.

Copy link
Copy Markdown
Author

@grantlouisherman grantlouisherman May 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I could be incorrect in my understanding but the offset is showing where the current offset pointer is. Then we set the pointer for offset_data because now we have added the Len of the buff size which now points to the start of the data and then the offset on line 2378 adds the block size. So then when we add another file we start pointing at the end of the previous block.

# If there's data to follow, append it.
if fileobj is not None:
copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
Expand Down
23 changes: 23 additions & 0 deletions Lib/test/test_tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1485,6 +1485,29 @@ class WriteTest(WriteTestBase, unittest.TestCase):

prefix = "w:"

def test_addfile_sets_offsets(self):
# gh-150075: addfile() must set offset and offset_data on the
# TarInfo stored in the archive so they match a subsequent read.
data = b"data"

with tarfile.open(tmpname, self.mode) as tar:
t1 = tarfile.TarInfo("test1.txt")
t1.size = len(data)
tar.addfile(t1, io.BytesIO(data))

t2 = tarfile.TarInfo("test2.txt")
t2.size = len(data)
tar.addfile(t2, io.BytesIO(data))

write_members = tar.getmembers()

with tarfile.open(tmpname) as tar:
read_members = tar.getmembers()

for w, r in zip(write_members, read_members):
self.assertEqual(w.offset, r.offset)
self.assertEqual(w.offset_data, r.offset_data)

def test_100_char_name(self):
# The name field in a tar header stores strings of at most 100 chars.
# If a string is shorter than 100 chars it has to be padded with '\0',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
tar.addfile() doesn't set member offsets in 3.15. From reviewing the file it seemed like the copy was not adding the proper offsets to the tarinfo object. The way I fixed this is that I added the classes current offset of when.addFile is called and then added the block size after .tobuf function was called.
Loading