| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345 |
- #
- # Copyright (C) 2008 The Android Open Source Project
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import stat
- import struct
- import zlib
- import cStringIO
- from import_ext import ImportExternal
- from error import ImportError
- class ImportZip(ImportExternal):
- """Streams a zip file from the network directly into a Project's
- Git repository.
- """
- @classmethod
- def CanAccept(cls, url):
- """Can this importer read and unpack the data stored at url?
- """
- if url.endswith('.zip') or url.endswith('.jar'):
- return True
- return False
- def _UnpackFiles(self):
- url_fd, url = self._OpenUrl()
- try:
- if not self.__class__.CanAccept(url):
- raise ImportError('non-zip file extension: %s' % url)
- zip = _ZipFile(url_fd)
- for entry in zip.FileRecords():
- data = zip.Open(entry).read()
- sz = len(data)
- if data and _SafeCRLF(data):
- data = data.replace('\r\n', '\n')
- sz = len(data)
- fd = cStringIO.StringIO(data)
- self._UnpackOneFile(entry.mode, sz, entry.name, fd)
- zip.Close(entry)
- for entry in zip.CentralDirectory():
- self._SetFileMode(entry.name, entry.mode)
- zip.CheckTail()
- finally:
- url_fd.close()
- def _SafeCRLF(data):
- """Is it reasonably safe to perform a CRLF->LF conversion?
- If the stream contains a NUL byte it is likely binary,
- and thus a CRLF->LF conversion may damage the stream.
- If the only NUL is in the last position of the stream,
- but it otherwise can do a CRLF<->LF conversion we do
- the CRLF conversion anyway. At least one source ZIP
- file has this structure in its source code.
- If every occurrance of a CR and LF is paired up as a
- CRLF pair then the conversion is safely bi-directional.
- s/\r\n/\n/g == s/\n/\r\\n/g can convert between them.
- """
- nul = data.find('\0')
- if 0 <= nul and nul < (len(data) - 1):
- return False
- n_lf = 0
- last = 0
- while True:
- lf = data.find('\n', last)
- if lf < 0:
- break
- if lf == 0 or data[lf - 1] != '\r':
- return False
- last = lf + 1
- n_lf += 1
- return n_lf > 0
- class _ZipFile(object):
- """Streaming iterator to parse a zip file on the fly.
- """
- def __init__(self, fd):
- self._fd = _UngetStream(fd)
- def FileRecords(self):
- return _FileIter(self._fd)
- def CentralDirectory(self):
- return _CentIter(self._fd)
- def CheckTail(self):
- type_buf = self._fd.read(4)
- type = struct.unpack('<I', type_buf)[0]
- if type != 0x06054b50: # end of central directory
- raise ImportError('zip record %x unsupported' % type)
- def Open(self, entry):
- if entry.is_compressed:
- return _InflateStream(self._fd)
- else:
- if entry.has_trailer:
- raise ImportError('unable to extract streamed zip')
- return _FixedLengthStream(self._fd, entry.uncompressed_size)
- def Close(self, entry):
- if entry.has_trailer:
- type = struct.unpack('<I', self._fd.read(4))[0]
- if type == 0x08074b50:
- # Not a formal type marker, but commonly seen in zips
- # as the data descriptor signature.
- #
- struct.unpack('<3I', self._fd.read(12))
- else:
- # No signature for the data descriptor, so read the
- # remaining fields out of the stream
- #
- self._fd.read(8)
- class _FileIter(object):
- def __init__(self, fd):
- self._fd = fd
- def __iter__(self):
- return self
- def next(self):
- fd = self._fd
- type_buf = fd.read(4)
- type = struct.unpack('<I', type_buf)[0]
- if type != 0x04034b50: # local file header
- fd.unread(type_buf)
- raise StopIteration()
- rec = _FileHeader(fd.read(26))
- rec.name = fd.read(rec.name_len)
- fd.read(rec.extra_len)
- if rec.name.endswith('/'):
- rec.name = rec.name[:-1]
- rec.mode = stat.S_IFDIR | 0777
- return rec
- class _FileHeader(object):
- """Information about a single file in the archive.
- 0 version needed to extract 2 bytes
- 1 general purpose bit flag 2 bytes
- 2 compression method 2 bytes
- 3 last mod file time 2 bytes
- 4 last mod file date 2 bytes
- 5 crc-32 4 bytes
- 6 compressed size 4 bytes
- 7 uncompressed size 4 bytes
- 8 file name length 2 bytes
- 9 extra field length 2 bytes
- """
- def __init__(self, raw_bin):
- rec = struct.unpack('<5H3I2H', raw_bin)
-
- if rec[2] == 8:
- self.is_compressed = True
- elif rec[2] == 0:
- self.is_compressed = False
- else:
- raise ImportError('unrecognized compression format')
- if rec[1] & (1 << 3):
- self.has_trailer = True
- else:
- self.has_trailer = False
- self.compressed_size = rec[6]
- self.uncompressed_size = rec[7]
- self.name_len = rec[8]
- self.extra_len = rec[9]
- self.mode = stat.S_IFREG | 0644
- class _CentIter(object):
- def __init__(self, fd):
- self._fd = fd
- def __iter__(self):
- return self
- def next(self):
- fd = self._fd
- type_buf = fd.read(4)
- type = struct.unpack('<I', type_buf)[0]
- if type != 0x02014b50: # central directory
- fd.unread(type_buf)
- raise StopIteration()
- rec = _CentHeader(fd.read(42))
- rec.name = fd.read(rec.name_len)
- fd.read(rec.extra_len)
- fd.read(rec.comment_len)
- if rec.name.endswith('/'):
- rec.name = rec.name[:-1]
- rec.mode = stat.S_IFDIR | 0777
- return rec
- class _CentHeader(object):
- """Information about a single file in the archive.
- 0 version made by 2 bytes
- 1 version needed to extract 2 bytes
- 2 general purpose bit flag 2 bytes
- 3 compression method 2 bytes
- 4 last mod file time 2 bytes
- 5 last mod file date 2 bytes
- 6 crc-32 4 bytes
- 7 compressed size 4 bytes
- 8 uncompressed size 4 bytes
- 9 file name length 2 bytes
- 10 extra field length 2 bytes
- 11 file comment length 2 bytes
- 12 disk number start 2 bytes
- 13 internal file attributes 2 bytes
- 14 external file attributes 4 bytes
- 15 relative offset of local header 4 bytes
- """
- def __init__(self, raw_bin):
- rec = struct.unpack('<6H3I5H2I', raw_bin)
- self.name_len = rec[9]
- self.extra_len = rec[10]
- self.comment_len = rec[11]
- if (rec[0] & 0xff00) == 0x0300: # UNIX
- self.mode = rec[14] >> 16
- else:
- self.mode = stat.S_IFREG | 0644
- class _UngetStream(object):
- """File like object to read and rewind a stream.
- """
- def __init__(self, fd):
- self._fd = fd
- self._buf = None
- def read(self, size = -1):
- r = []
- try:
- if size >= 0:
- self._ReadChunk(r, size)
- else:
- while True:
- self._ReadChunk(r, 2048)
- except EOFError:
- pass
- if len(r) == 1:
- return r[0]
- return ''.join(r)
- def unread(self, buf):
- b = self._buf
- if b is None or len(b) == 0:
- self._buf = buf
- else:
- self._buf = buf + b
- def _ReadChunk(self, r, size):
- b = self._buf
- try:
- while size > 0:
- if b is None or len(b) == 0:
- b = self._Inflate(self._fd.read(2048))
- if not b:
- raise EOFError()
- continue
- use = min(size, len(b))
- r.append(b[:use])
- b = b[use:]
- size -= use
- finally:
- self._buf = b
- def _Inflate(self, b):
- return b
- class _FixedLengthStream(_UngetStream):
- """File like object to read a fixed length stream.
- """
- def __init__(self, fd, have):
- _UngetStream.__init__(self, fd)
- self._have = have
- def _Inflate(self, b):
- n = self._have
- if n == 0:
- self._fd.unread(b)
- return None
- if len(b) > n:
- self._fd.unread(b[n:])
- b = b[:n]
- self._have -= len(b)
- return b
- class _InflateStream(_UngetStream):
- """Inflates the stream as it reads input.
- """
- def __init__(self, fd):
- _UngetStream.__init__(self, fd)
- self._z = zlib.decompressobj(-zlib.MAX_WBITS)
- def _Inflate(self, b):
- z = self._z
- if not z:
- self._fd.unread(b)
- return None
- b = z.decompress(b)
- if z.unconsumed_tail != '':
- self._fd.unread(z.unconsumed_tail)
- elif z.unused_data != '':
- self._fd.unread(z.unused_data)
- self._z = None
- return b
|