#-------------------------------------------------------------------
# tarfile.py
#-------------------------------------------------------------------
# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
# All rights reserved.
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
from __future__ import print_function
"""Read from and write to tar format archives.
"""
__version__ = "$Revision$"
version = "0.9.0"
__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $"
__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $"
__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
#---------
# Imports
#---------
import sys
import os
import stat
import errno
import time
import struct
import copy
import re
try:
import grp, pwd
except ImportError:
grp = pwd = None
# os.symlink on Windows prior to 6.0 raises NotImplementedError
symlink_exception = (AttributeError, NotImplementedError)
try:
# WindowsError (1314) will be raised if the caller does not hold the
# SeCreateSymbolicLinkPrivilege privilege
symlink_exception += (WindowsError,)
except NameError:
pass
# from tarfile import *
__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
if sys.version_info[0] < 3:
import __builtin__ as builtins
else:
import builtins
_open = builtins.open # Since 'open' is TarFile.open
#---------------------------------------------------------
# tar constants
#---------------------------------------------------------
NUL = b"\0" # the null character
BLOCKSIZE = 512 # length of processing blocks
RECORDSIZE = BLOCKSIZE * 20 # length of records
GNU_MAGIC = b"ustar \0" # magic gnu tar string
POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
LENGTH_NAME = 100 # maximum length of a filename
LENGTH_LINK = 100 # maximum length of a linkname
LENGTH_PREFIX = 155 # maximum length of the prefix field
REGTYPE = b"0" # regular file
AREGTYPE = b"\0" # regular file
LNKTYPE = b"1" # link (inside tarfile)
SYMTYPE = b"2" # symbolic link
CHRTYPE = b"3" # character special device
BLKTYPE = b"4" # block special device
DIRTYPE = b"5" # directory
FIFOTYPE = b"6" # fifo special device
CONTTYPE = b"7" # contiguous file
GNUTYPE_LONGNAME = b"L" # GNU tar longname
GNUTYPE_LONGLINK = b"K" # GNU tar longlink
GNUTYPE_SPARSE = b"S" # GNU tar sparse file
XHDTYPE = b"x" # POSIX.1-2001 extended header
XGLTYPE = b"g" # POSIX.1-2001 global header
SOLARIS_XHDTYPE = b"X" # Solaris extended header
USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
GNU_FORMAT = 1 # GNU tar format
PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
DEFAULT_FORMAT = GNU_FORMAT
#---------------------------------------------------------
# tarfile constants
#---------------------------------------------------------
# File types that tarfile supports:
SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
SYMTYPE, DIRTYPE, FIFOTYPE,
CONTTYPE, CHRTYPE, BLKTYPE,
GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
GNUTYPE_SPARSE)
# File types that will be treated as a regular file.
REGULAR_TYPES = (REGTYPE, AREGTYPE,
CONTTYPE, GNUTYPE_SPARSE)
# File types that are part of the GNU tar format.
GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
GNUTYPE_SPARSE)
# Fields from a pax header that override a TarInfo attribute.
PAX_FIELDS = ("path", "linkpath", "size", "mtime",
"uid", "gid", "uname", "gname")
# Fields from a pax header that are affected by hdrcharset.
PAX_NAME_FIELDS = set(("path", "linkpath", "uname", "gname"))
# Fields in a pax header that are numbers, all other fields
# are treated as strings.
PAX_NUMBER_FIELDS = {
"atime": float,
"ctime": float,
"mtime": float,
"uid": int,
"gid": int,
"size": int
}
#---------------------------------------------------------
# Bits used in the mode field, values in octal.
#---------------------------------------------------------
S_IFLNK = 0o120000 # symbolic link
S_IFREG = 0o100000 # regular file
S_IFBLK = 0o060000 # block device
S_IFDIR = 0o040000 # directory
S_IFCHR = 0o020000 # character device
S_IFIFO = 0o010000 # fifo
TSUID = 0o4000 # set UID on execution
TSGID = 0o2000 # set GID on execution
TSVTX = 0o1000 # reserved
TUREAD = 0o400 # read by owner
TUWRITE = 0o200 # write by owner
TUEXEC = 0o100 # execute/search by owner
TGREAD = 0o040 # read by group
TGWRITE = 0o020 # write by group
TGEXEC = 0o010 # execute/search by group
TOREAD = 0o004 # read by other
TOWRITE = 0o002 # write by other
TOEXEC = 0o001 # execute/search by other
#---------------------------------------------------------
# initialization
#---------------------------------------------------------
if os.name in ("nt", "ce"):
ENCODING = "utf-8"
else:
ENCODING = sys.getfilesystemencoding()
#---------------------------------------------------------
# Some useful functions
#---------------------------------------------------------
def stn(s, length, encoding, errors):
"""Convert a string to a null-terminated bytes object.
"""
s = s.encode(encoding, errors)
return s[:length] + (length - len(s)) * NUL
def nts(s, encoding, errors):
"""Convert a null-terminated bytes object to a string.
"""
p = s.find(b"\0")
if p != -1:
s = s[:p]
return s.decode(encoding, errors)
def nti(s):
"""Convert a number field to a python number.
"""
# There are two possible encodings for a number field, see
# itn() below.
if s[0] != chr(0o200):
try:
n = int(nts(s, "ascii", "strict") or "0", 8)
except ValueError:
raise InvalidHeaderError("invalid header")
else:
n = 0
for i in range(len(s) - 1):
n <<= 8
n += ord(s[i + 1])
return n
def itn(n, digits=8, format=DEFAULT_FORMAT):
"""Convert a python number to a number field.
"""
# POSIX 1003.1-1988 requires numbers to be encoded as a string of
# octal digits followed by a null-byte, this allows values up to
# (8**(digits-1))-1. GNU tar allows storing numbers greater than
# that if necessary. A leading 0o200 byte indicates this particular
# encoding, the following digits-1 bytes are a big-endian
# representation. This allows values up to (256**(digits-1))-1.
if 0 <= n < 8 ** (digits - 1):
s = ("%0*o" % (digits - 1, n)).encode("ascii") + NUL
else:
if format != GNU_FORMAT or n >= 256 ** (digits - 1):
raise ValueError("overflow in number field")
if n < 0:
# XXX We mimic GNU tar's behaviour with negative numbers,
# this could raise OverflowError.
n = struct.unpack("L", struct.pack("l", n))[0]
s = bytearray()
for i in range(digits - 1):
s.insert(0, n & 0o377)
n >>= 8
s.insert(0, 0o200)
return s
def calc_chksums(buf):
"""Calculate the checksum for a member's header by summing up all
characters except for the chksum field which is treated as if
it was filled with spaces. According to the GNU tar sources,
some tars (Sun and NeXT) calculate chksum with signed char,
which will be different if there are chars in the buffer with
the high bit set. So we calculate two checksums, unsigned and
signed.
"""
unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
return unsigned_chksum, signed_chksum
def copyfileobj(src, dst, length=None):
"""Copy length bytes from fileobj src to fileobj dst.
If length is None, copy the entire content.
"""
if length == 0:
return
if length is None:
while True:
buf = src.read(16*1024)
if not buf:
break
dst.write(buf)
return
BUFSIZE = 16 * 1024
blocks, remainder = divmod(length, BUFSIZE)
for b in range(blocks):
buf = src.read(BUFSIZE)
if len(buf) < BUFSIZE:
raise IOError("end of file reached")
dst.write(buf)
if remainder != 0:
buf = src.read(remainder)
if len(buf) < remainder:
raise IOError("end of file reached")
dst.write(buf)
return
filemode_table = (
((S_IFLNK, "l"),
(S_IFREG, "-"),
(S_IFBLK, "b"),
(S_IFDIR, "d"),
(S_IFCHR, "c"),
(S_IFIFO, "p")),
((TUREAD, "r"),),
((TUWRITE, "w"),),
((TUEXEC|TSUID, "s"),
(TSUID, "S"),
(TUEXEC, "x")),
((TGREAD, "r"),),
((TGWRITE, "w"),),
((TGEXEC|TSGID, "s"),
(TSGID, "S"),
(TGEXEC, "x")),
((TOREAD, "r"),),
((TOWRITE, "w"),),
((TOEXEC|TSVTX, "t"),
(TSVTX, "T"),
(TOEXEC, "x"))
)
def filemode(mode):
"""Convert a file's mode to a string of the form
-rwxrwxrwx.
Used by TarFile.list()
"""
perm = []
for table in filemode_table:
for bit, char in table:
if mode & bit == bit:
perm.append(char)
break
else:
perm.append("-")
return "".join(perm)
class TarError(Exception):
"""Base exception."""
pass
class ExtractError(TarError):
"""General exception for extract errors."""
pass
class ReadError(TarError):
"""Exception for unreadable tar archives."""
pass
class CompressionError(TarError):
"""Exception for unavailable compression methods."""
pass
class StreamError(TarError):
"""Exception for unsupported operations on stream-like TarFiles."""
pass
class HeaderError(TarError):
"""Base exception for header errors."""
pass
class EmptyHeaderError(HeaderError):
Loading ...