using System; using System.Text; using ICSharpCode.SharpZipLib.Core; namespace ICSharpCode.SharpZipLib.Zip { /// /// This static class contains functions for encoding and decoding zip file strings /// public static class ZipStrings { static ZipStrings() { try { var platformCodepage = Encoding.GetEncoding(0).CodePage; SystemDefaultCodePage = (platformCodepage == 1 || platformCodepage == 2 || platformCodepage == 3 || platformCodepage == 42) ? FallbackCodePage : platformCodepage; } catch { SystemDefaultCodePage = FallbackCodePage; } } /// Code page backing field /// /// The original Zip specification (https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT) states /// that file names should only be encoded with IBM Code Page 437 or UTF-8. /// In practice, most zip apps use OEM or system encoding (typically cp437 on Windows). /// Let's be good citizens and default to UTF-8 http://utf8everywhere.org/ /// private static int codePage = AutomaticCodePage; /// Automatically select codepage while opening archive /// see https://github.com/icsharpcode/SharpZipLib/pull/280#issuecomment-433608324 /// private const int AutomaticCodePage = -1; /// /// Encoding used for string conversion. Setting this to 65001 (UTF-8) will /// also set the Language encoding flag to indicate UTF-8 encoded file names. /// public static int CodePage { get { return codePage == AutomaticCodePage? Encoding.UTF8.CodePage:codePage; } set { if ((value < 0) || (value > 65535) || (value == 1) || (value == 2) || (value == 3) || (value == 42)) { throw new ArgumentOutOfRangeException(nameof(value)); } codePage = value; } } private const int FallbackCodePage = 437; /// /// Attempt to get the operating system default codepage, or failing that, to /// the fallback code page IBM 437. /// public static int SystemDefaultCodePage { get; } /// /// Get whether the default codepage is set to UTF-8. Setting this property to false will /// set the to /// /// /// Get OEM codepage from NetFX, which parses the NLP file with culture info table etc etc. /// But sometimes it yields the special value of 1 which is nicknamed CodePageNoOEM in sources (might also mean CP_OEMCP, but Encoding puts it so). /// This was observed on Ukranian and Hindu systems. /// Given this value, throws an . /// So replace it with , (IBM 437 which is the default code page in a default Windows installation console. /// public static bool UseUnicode { get { return codePage == Encoding.UTF8.CodePage; } set { if (value) { codePage = Encoding.UTF8.CodePage; } else { codePage = SystemDefaultCodePage; } } } /// /// Convert a portion of a byte array to a string using /// /// /// Data to convert to string /// /// /// Number of bytes to convert starting from index 0 /// /// /// data[0]..data[count - 1] converted to a string /// public static string ConvertToString(byte[] data, int count) => data == null ? string.Empty : Encoding.GetEncoding(CodePage).GetString(data, 0, count); /// /// Convert a byte array to a string using /// /// /// Byte array to convert /// /// /// dataconverted to a string /// public static string ConvertToString(byte[] data) => ConvertToString(data, data.Length); private static Encoding EncodingFromFlag(int flags) => ((flags & (int)GeneralBitFlags.UnicodeText) != 0) ? Encoding.UTF8 : Encoding.GetEncoding( // if CodePage wasn't set manually and no utf flag present // then we must use SystemDefault (old behavior) // otherwise, CodePage should be preferred over SystemDefault // see https://github.com/icsharpcode/SharpZipLib/issues/274 codePage == AutomaticCodePage? SystemDefaultCodePage: codePage); /// /// Convert a byte array to a string using /// /// The applicable general purpose bits flags /// /// Byte array to convert /// /// The number of bytes to convert. /// /// dataconverted to a string /// public static string ConvertToStringExt(int flags, byte[] data, int count) => (data == null) ? string.Empty : EncodingFromFlag(flags).GetString(data, 0, count); /// /// Convert a byte array to a string using /// /// /// Byte array to convert /// /// The applicable general purpose bits flags /// /// dataconverted to a string /// public static string ConvertToStringExt(int flags, byte[] data) => ConvertToStringExt(flags, data, data.Length); /// /// Convert a string to a byte array using /// /// /// String to convert to an array /// /// Converted array public static byte[] ConvertToArray(string str) => str == null ? Empty.Array() : Encoding.GetEncoding(CodePage).GetBytes(str); /// /// Convert a string to a byte array using /// /// The applicable general purpose bits flags /// /// String to convert to an array /// /// Converted array public static byte[] ConvertToArray(int flags, string str) => (string.IsNullOrEmpty(str)) ? Empty.Array() : EncodingFromFlag(flags).GetBytes(str); } }