import_zip.py 8.8 KB


  1. #
  2. # Copyright (C) 2008 The Android Open Source Project
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import stat
  16. import struct
  17. import zlib
  18. import cStringIO
  19. from import_ext import ImportExternal
  20. from error import ImportError
  21. class ImportZip(ImportExternal):
  22. """Streams a zip file from the network directly into a Project's
  23. Git repository.
  24. """
  25. @classmethod
  26. def CanAccept(cls, url):
  27. """Can this importer read and unpack the data stored at url?
  28. """
  29. if url.endswith('.zip') or url.endswith('.jar'):
  30. return True
  31. return False
  32. def _UnpackFiles(self):
  33. url_fd, url = self._OpenUrl()
  34. try:
  35. if not self.__class__.CanAccept(url):
  36. raise ImportError('non-zip file extension: %s' % url)
  37. zip = _ZipFile(url_fd)
  38. for entry in zip.FileRecords():
  39. data = zip.Open(entry).read()
  40. sz = len(data)
  41. if data and _SafeCRLF(data):
  42. data = data.replace('\r\n', '\n')
  43. sz = len(data)
  44. fd = cStringIO.StringIO(data)
  45. self._UnpackOneFile(entry.mode, sz, entry.name, fd)
  46. zip.Close(entry)
  47. for entry in zip.CentralDirectory():
  48. self._SetFileMode(entry.name, entry.mode)
  49. zip.CheckTail()
  50. finally:
  51. url_fd.close()
  52. def _SafeCRLF(data):
  53. """Is it reasonably safe to perform a CRLF->LF conversion?
  54. If the stream contains a NUL byte it is likely binary,
  55. and thus a CRLF->LF conversion may damage the stream.
  56. If the only NUL is in the last position of the stream,
  57. but it otherwise can do a CRLF<->LF conversion we do
  58. the CRLF conversion anyway. At least one source ZIP
  59. file has this structure in its source code.
  60. If every occurrance of a CR and LF is paired up as a
  61. CRLF pair then the conversion is safely bi-directional.
  62. s/\r\n/\n/g == s/\n/\r\\n/g can convert between them.
  63. """
  64. nul = data.find('\0')
  65. if 0 <= nul and nul < (len(data) - 1):
  66. return False
  67. n_lf = 0
  68. last = 0
  69. while True:
  70. lf = data.find('\n', last)
  71. if lf < 0:
  72. break
  73. if lf == 0 or data[lf - 1] != '\r':
  74. return False
  75. last = lf + 1
  76. n_lf += 1
  77. return n_lf > 0
  78. class _ZipFile(object):
  79. """Streaming iterator to parse a zip file on the fly.
  80. """
  81. def __init__(self, fd):
  82. self._fd = _UngetStream(fd)
  83. def FileRecords(self):
  84. return _FileIter(self._fd)
  85. def CentralDirectory(self):
  86. return _CentIter(self._fd)
  87. def CheckTail(self):
  88. type_buf = self._fd.read(4)
  89. type = struct.unpack('<I', type_buf)[0]
  90. if type != 0x06054b50: # end of central directory
  91. raise ImportError('zip record %x unsupported' % type)
  92. def Open(self, entry):
  93. if entry.is_compressed:
  94. return _InflateStream(self._fd)
  95. else:
  96. if entry.has_trailer:
  97. raise ImportError('unable to extract streamed zip')
  98. return _FixedLengthStream(self._fd, entry.uncompressed_size)
  99. def Close(self, entry):
  100. if entry.has_trailer:
  101. type = struct.unpack('<I', self._fd.read(4))[0]
  102. if type == 0x08074b50:
  103. # Not a formal type marker, but commonly seen in zips
  104. # as the data descriptor signature.
  105. #
  106. struct.unpack('<3I', self._fd.read(12))
  107. else:
  108. # No signature for the data descriptor, so read the
  109. # remaining fields out of the stream
  110. #
  111. self._fd.read(8)
  112. class _FileIter(object):
  113. def __init__(self, fd):
  114. self._fd = fd
  115. def __iter__(self):
  116. return self
  117. def next(self):
  118. fd = self._fd
  119. type_buf = fd.read(4)
  120. type = struct.unpack('<I', type_buf)[0]
  121. if type != 0x04034b50: # local file header
  122. fd.unread(type_buf)
  123. raise StopIteration()
  124. rec = _FileHeader(fd.read(26))
  125. rec.name = fd.read(rec.name_len)
  126. fd.read(rec.extra_len)
  127. if rec.name.endswith('/'):
  128. rec.name = rec.name[:-1]
  129. rec.mode = stat.S_IFDIR | 0777
  130. return rec
  131. class _FileHeader(object):
  132. """Information about a single file in the archive.
  133. 0 version needed to extract 2 bytes
  134. 1 general purpose bit flag 2 bytes
  135. 2 compression method 2 bytes
  136. 3 last mod file time 2 bytes
  137. 4 last mod file date 2 bytes
  138. 5 crc-32 4 bytes
  139. 6 compressed size 4 bytes
  140. 7 uncompressed size 4 bytes
  141. 8 file name length 2 bytes
  142. 9 extra field length 2 bytes
  143. """
  144. def __init__(self, raw_bin):
  145. rec = struct.unpack('<5H3I2H', raw_bin)
  146. if rec[2] == 8:
  147. self.is_compressed = True
  148. elif rec[2] == 0:
  149. self.is_compressed = False
  150. else:
  151. raise ImportError('unrecognized compression format')
  152. if rec[1] & (1 << 3):
  153. self.has_trailer = True
  154. else:
  155. self.has_trailer = False
  156. self.compressed_size = rec[6]
  157. self.uncompressed_size = rec[7]
  158. self.name_len = rec[8]
  159. self.extra_len = rec[9]
  160. self.mode = stat.S_IFREG | 0644
  161. class _CentIter(object):
  162. def __init__(self, fd):
  163. self._fd = fd
  164. def __iter__(self):
  165. return self
  166. def next(self):
  167. fd = self._fd
  168. type_buf = fd.read(4)
  169. type = struct.unpack('<I', type_buf)[0]
  170. if type != 0x02014b50: # central directory
  171. fd.unread(type_buf)
  172. raise StopIteration()
  173. rec = _CentHeader(fd.read(42))
  174. rec.name = fd.read(rec.name_len)
  175. fd.read(rec.extra_len)
  176. fd.read(rec.comment_len)
  177. if rec.name.endswith('/'):
  178. rec.name = rec.name[:-1]
  179. rec.mode = stat.S_IFDIR | 0777
  180. return rec
  181. class _CentHeader(object):
  182. """Information about a single file in the archive.
  183. 0 version made by 2 bytes
  184. 1 version needed to extract 2 bytes
  185. 2 general purpose bit flag 2 bytes
  186. 3 compression method 2 bytes
  187. 4 last mod file time 2 bytes
  188. 5 last mod file date 2 bytes
  189. 6 crc-32 4 bytes
  190. 7 compressed size 4 bytes
  191. 8 uncompressed size 4 bytes
  192. 9 file name length 2 bytes
  193. 10 extra field length 2 bytes
  194. 11 file comment length 2 bytes
  195. 12 disk number start 2 bytes
  196. 13 internal file attributes 2 bytes
  197. 14 external file attributes 4 bytes
  198. 15 relative offset of local header 4 bytes
  199. """
  200. def __init__(self, raw_bin):
  201. rec = struct.unpack('<6H3I5H2I', raw_bin)
  202. self.name_len = rec[9]
  203. self.extra_len = rec[10]
  204. self.comment_len = rec[11]
  205. if (rec[0] & 0xff00) == 0x0300: # UNIX
  206. self.mode = rec[14] >> 16
  207. else:
  208. self.mode = stat.S_IFREG | 0644
  209. class _UngetStream(object):
  210. """File like object to read and rewind a stream.
  211. """
  212. def __init__(self, fd):
  213. self._fd = fd
  214. self._buf = None
  215. def read(self, size = -1):
  216. r = []
  217. try:
  218. if size >= 0:
  219. self._ReadChunk(r, size)
  220. else:
  221. while True:
  222. self._ReadChunk(r, 2048)
  223. except EOFError:
  224. pass
  225. if len(r) == 1:
  226. return r[0]
  227. return ''.join(r)
  228. def unread(self, buf):
  229. b = self._buf
  230. if b is None or len(b) == 0:
  231. self._buf = buf
  232. else:
  233. self._buf = buf + b
  234. def _ReadChunk(self, r, size):
  235. b = self._buf
  236. try:
  237. while size > 0:
  238. if b is None or len(b) == 0:
  239. b = self._Inflate(self._fd.read(2048))
  240. if not b:
  241. raise EOFError()
  242. continue
  243. use = min(size, len(b))
  244. r.append(b[:use])
  245. b = b[use:]
  246. size -= use
  247. finally:
  248. self._buf = b
  249. def _Inflate(self, b):
  250. return b
  251. class _FixedLengthStream(_UngetStream):
  252. """File like object to read a fixed length stream.
  253. """
  254. def __init__(self, fd, have):
  255. _UngetStream.__init__(self, fd)
  256. self._have = have
  257. def _Inflate(self, b):
  258. n = self._have
  259. if n == 0:
  260. self._fd.unread(b)
  261. return None
  262. if len(b) > n:
  263. self._fd.unread(b[n:])
  264. b = b[:n]
  265. self._have -= len(b)
  266. return b
  267. class _InflateStream(_UngetStream):
  268. """Inflates the stream as it reads input.
  269. """
  270. def __init__(self, fd):
  271. _UngetStream.__init__(self, fd)
  272. self._z = zlib.decompressobj(-zlib.MAX_WBITS)
  273. def _Inflate(self, b):
  274. z = self._z
  275. if not z:
  276. self._fd.unread(b)
  277. return None
  278. b = z.decompress(b)
  279. if z.unconsumed_tail != '':
  280. self._fd.unread(z.unconsumed_tail)
  281. elif z.unused_data != '':
  282. self._fd.unread(z.unused_data)
  283. self._z = None
  284. return b