using ICSharpCode.SharpZipLib.Checksum; using ICSharpCode.SharpZipLib.Zip.Compression; using ICSharpCode.SharpZipLib.Zip.Compression.Streams; using System; using System.IO; using System.Text; namespace ICSharpCode.SharpZipLib.GZip { /// /// This filter stream is used to decompress a "GZIP" format stream. /// The "GZIP" format is described baseInputStream RFC 1952. /// /// author of the original java version : John Leuner /// /// This sample shows how to unzip a gzipped file /// /// using System; /// using System.IO; /// /// using ICSharpCode.SharpZipLib.Core; /// using ICSharpCode.SharpZipLib.GZip; /// /// class MainClass /// { /// public static void Main(string[] args) /// { /// using (Stream inStream = new GZipInputStream(File.OpenRead(args[0]))) /// using (FileStream outStream = File.Create(Path.GetFileNameWithoutExtension(args[0]))) { /// byte[] buffer = new byte[4096]; /// StreamUtils.Copy(inStream, outStream, buffer); /// } /// } /// } /// /// public class GZipInputStream : InflaterInputStream { #region Instance Fields /// /// CRC-32 value for uncompressed data /// protected Crc32 crc; /// /// Flag to indicate if we've read the GZIP header yet for the current member (block of compressed data). /// This is tracked per-block as the file is parsed. /// private bool readGZIPHeader; /// /// Flag to indicate if at least one block in a stream with concatenated blocks was read successfully. /// This allows us to exit gracefully if downstream data is not in gzip format. /// private bool completedLastBlock; private string fileName; #endregion Instance Fields #region Constructors /// /// Creates a GZipInputStream with the default buffer size /// /// /// The stream to read compressed data from (baseInputStream GZIP format) /// public GZipInputStream(Stream baseInputStream) : this(baseInputStream, 4096) { } /// /// Creates a GZIPInputStream with the specified buffer size /// /// /// The stream to read compressed data from (baseInputStream GZIP format) /// /// /// Size of the buffer to use /// public GZipInputStream(Stream baseInputStream, int size) : base(baseInputStream, new Inflater(true), size) { } #endregion Constructors #region Stream overrides /// /// Reads uncompressed data into an array of bytes /// /// /// The buffer to read uncompressed data into /// /// /// The offset indicating where the data should be placed /// /// /// The number of uncompressed bytes to be read /// /// Returns the number of bytes actually read. public override int Read(byte[] buffer, int offset, int count) { // A GZIP file can contain multiple blocks of compressed data, although this is quite rare. // A compressed block could potentially be empty, so we need to loop until we reach EOF or // we find data. while (true) { // If we haven't read the header for this block, read it if (!readGZIPHeader) { // Try to read header. If there is no header (0 bytes available), this is EOF. If there is // an incomplete header, this will throw an exception. try { if (!ReadHeader()) { return 0; } } catch (Exception ex) when (completedLastBlock && (ex is GZipException || ex is EndOfStreamException)) { // if we completed the last block (i.e. we're in a stream that has multiple blocks concatenated // we want to return gracefully from any header parsing exceptions since sometimes there may // be trailing garbage on a stream return 0; } } // Try to read compressed data int bytesRead = base.Read(buffer, offset, count); if (bytesRead > 0) { crc.Update(new ArraySegment(buffer, offset, bytesRead)); } // If this is the end of stream, read the footer if (inf.IsFinished) { ReadFooter(); } // Attempting to read 0 bytes will never yield any bytesRead, so we return instead of looping forever if (bytesRead > 0 || count == 0) { return bytesRead; } } } /// /// Retrieves the filename header field for the block last read /// /// public string GetFilename() { return fileName; } #endregion Stream overrides #region Support routines private bool ReadHeader() { // Initialize CRC for this block crc = new Crc32(); // Make sure there is data in file. We can't rely on ReadLeByte() to fill the buffer, as this could be EOF, // which is fine, but ReadLeByte() throws an exception if it doesn't find data, so we do this part ourselves. if (inputBuffer.Available <= 0) { inputBuffer.Fill(); if (inputBuffer.Available <= 0) { // No header, EOF. return false; } } var headCRC = new Crc32(); // 1. Check the two magic bytes var magic = inputBuffer.ReadLeByte(); headCRC.Update(magic); if (magic != GZipConstants.ID1) { throw new GZipException("Error GZIP header, first magic byte doesn't match"); } magic = inputBuffer.ReadLeByte(); if (magic != GZipConstants.ID2) { throw new GZipException("Error GZIP header, second magic byte doesn't match"); } headCRC.Update(magic); // 2. Check the compression type (must be 8) var compressionType = inputBuffer.ReadLeByte(); if (compressionType != GZipConstants.CompressionMethodDeflate) { throw new GZipException("Error GZIP header, data not in deflate format"); } headCRC.Update(compressionType); // 3. Check the flags var flagsByte = inputBuffer.ReadLeByte(); headCRC.Update(flagsByte); // 3.1 Check the reserved bits are zero if ((flagsByte & 0xE0) != 0) { throw new GZipException("Reserved flag bits in GZIP header != 0"); } var flags = (GZipFlags)flagsByte; // 4.-6. Skip the modification time, extra flags, and OS type for (int i = 0; i < 6; i++) { headCRC.Update(inputBuffer.ReadLeByte()); } // 7. Read extra field if (flags.HasFlag(GZipFlags.FEXTRA)) { // XLEN is total length of extra subfields, we will skip them all var len1 = inputBuffer.ReadLeByte(); var len2 = inputBuffer.ReadLeByte(); headCRC.Update(len1); headCRC.Update(len2); int extraLen = (len2 << 8) | len1; // gzip is LSB first for (int i = 0; i < extraLen; i++) { headCRC.Update(inputBuffer.ReadLeByte()); } } // 8. Read file name if (flags.HasFlag(GZipFlags.FNAME)) { var fname = new byte[1024]; var fnamePos = 0; int readByte; while ((readByte = inputBuffer.ReadLeByte()) > 0) { if (fnamePos < 1024) { fname[fnamePos++] = (byte)readByte; } headCRC.Update(readByte); } headCRC.Update(readByte); fileName = GZipConstants.Encoding.GetString(fname, 0, fnamePos); } else { fileName = null; } // 9. Read comment if (flags.HasFlag(GZipFlags.FCOMMENT)) { int readByte; while ((readByte = inputBuffer.ReadLeByte()) > 0) { headCRC.Update(readByte); } headCRC.Update(readByte); } // 10. Read header CRC if (flags.HasFlag(GZipFlags.FHCRC)) { int tempByte; int crcval = inputBuffer.ReadLeByte(); if (crcval < 0) { throw new EndOfStreamException("EOS reading GZIP header"); } tempByte = inputBuffer.ReadLeByte(); if (tempByte < 0) { throw new EndOfStreamException("EOS reading GZIP header"); } crcval = (crcval << 8) | tempByte; if (crcval != ((int)headCRC.Value & 0xffff)) { throw new GZipException("Header CRC value mismatch"); } } readGZIPHeader = true; return true; } private void ReadFooter() { byte[] footer = new byte[8]; // End of stream; reclaim all bytes from inf, read the final byte count, and reset the inflator long bytesRead = inf.TotalOut & 0xffffffff; inputBuffer.Available += inf.RemainingInput; inf.Reset(); // Read footer from inputBuffer int needed = 8; while (needed > 0) { int count = inputBuffer.ReadClearTextBuffer(footer, 8 - needed, needed); if (count <= 0) { throw new EndOfStreamException("EOS reading GZIP footer"); } needed -= count; // Jewel Jan 16 } // Calculate CRC int crcval = (footer[0] & 0xff) | ((footer[1] & 0xff) << 8) | ((footer[2] & 0xff) << 16) | (footer[3] << 24); if (crcval != (int)crc.Value) { throw new GZipException("GZIP crc sum mismatch, theirs \"" + crcval + "\" and ours \"" + (int)crc.Value); } // NOTE The total here is the original total modulo 2 ^ 32. uint total = (uint)((uint)footer[4] & 0xff) | (uint)(((uint)footer[5] & 0xff) << 8) | (uint)(((uint)footer[6] & 0xff) << 16) | (uint)((uint)footer[7] << 24); if (bytesRead != total) { throw new GZipException("Number of bytes mismatch in footer"); } // Mark header read as false so if another header exists, we'll continue reading through the file readGZIPHeader = false; // Indicate that we succeeded on at least one block so we can exit gracefully if there is trailing garbage downstream completedLastBlock = true; } #endregion Support routines } }