diff --git a/docs/README.md b/docs/README.md index 6edd6c9..4e523f9 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,16 +2,19 @@ ## Release Notes +### 0.2.3 +- [Bug] Fix ShMemory leak and static problem. +- Support style datetime format mapping to datetime type. + ### 0.2.2 - SavaAs support xl/sheet dimension - [Breaking Changes] SaveAs value type from object to DataTable & ICollection -- Bug fix: ICollection with type but no data error (https://github.com/shps951023/MiniExcel/issues/105) +- [Bug] Fix ICollection with type but no data error (https://github.com/shps951023/MiniExcel/issues/105) ### 0.2.1 - Optimize type mapping bool and datetime auto check - Query Support xl/worksheets/Sheet Xml Xml `` without `r` attribute or without `` but `` with `r` attribute, but now performance is slow than with dimension ([](https://github.com/shps951023/MiniExcel/issues/2)) - ### 0.2.0 - Release to nuget.org diff --git a/src/MiniExcel/MiniExcel.cs b/src/MiniExcel/MiniExcel.cs index 15ea05b..2aa7bfe 100644 --- a/src/MiniExcel/MiniExcel.cs +++ b/src/MiniExcel/MiniExcel.cs @@ -14,7 +14,7 @@ using System.Globalization; using System.Collections; - public static partial class MiniExcel + public static class MiniExcel { private readonly static UTF8Encoding Utf8WithBom = new System.Text.UTF8Encoding(true); diff --git a/src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs b/src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs index 583a63f..37f94a5 100644 --- a/src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs +++ b/src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs @@ -1,4 +1,5 @@ using MiniExcelLibs.Utils; +using MiniExcelLibs.Zip; using System; using System.Collections.Generic; using System.Collections.ObjectModel; @@ -6,17 +7,21 @@ using System.Globalization; using System.IO; using System.IO.Compression; using System.Linq; -using System.Text; using System.Xml; using System.Xml.Linq; namespace MiniExcelLibs.OpenXml { - internal partial class ExcelOpenXmlSheetReader + internal class ExcelOpenXmlSheetReader { - internal Dictionary GetSharedStrings(ReadOnlyCollection entries) + private const string ns = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"; + private List _sheetRecords = null; + private Dictionary _SharedStrings; + private ExcelOpenXmlStyles _style; + + internal Dictionary GetSharedStrings(ExcelOpenXmlZip archive) { - var sharedStringsEntry = entries.SingleOrDefault(w => w.FullName == "xl/sharedStrings.xml"); + var sharedStringsEntry = archive.GetEntry("xl/sharedStrings.xml"); if (sharedStringsEntry == null) return null; using (var stream = sharedStringsEntry.Open()) @@ -24,7 +29,7 @@ namespace MiniExcelLibs.OpenXml var xl = XElement.Load(stream); var ts = xl.Descendants(ExcelOpenXmlXName.T).Select((s, i) => new { i, v = s.Value?.ToString() }) .ToDictionary(s => s.i, s => s.v) - ; + ;//TODO:need recode return ts; } } @@ -34,7 +39,7 @@ namespace MiniExcelLibs.OpenXml using (var stream = entries.Single(w => w.FullName == "xl/workbook.xml").Open()) using (XmlReader reader = XmlReader.Create(stream, XmlSettings)) { - if (!reader.IsStartElement("workbook", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")) + if (!reader.IsStartElement("workbook", ns)) { yield break; } @@ -46,7 +51,7 @@ namespace MiniExcelLibs.OpenXml while (!reader.EOF) { - if (reader.IsStartElement("sheets", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")) + if (reader.IsStartElement("sheets", ns)) { if (!XmlReaderHelper.ReadFirstContent(reader)) { @@ -55,7 +60,7 @@ namespace MiniExcelLibs.OpenXml while (!reader.EOF) { - if (reader.IsStartElement("sheet", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")) + if (reader.IsStartElement("sheet", ns)) { yield return new SheetRecord( reader.GetAttribute("name"), @@ -77,15 +82,13 @@ namespace MiniExcelLibs.OpenXml } } } - - - private const string NsSpreadsheetMl = @"http://schemas.openxmlformats.org/spreadsheetml/2006/main"; + internal IEnumerable ReadStyle(ReadOnlyCollection entries) { using (var stream = entries.Single(w => w.FullName == "xl/styles.xml").Open()) using (XmlReader reader = XmlReader.Create(stream, XmlSettings)) { - if (!reader.IsStartElement("styleSheet", NsSpreadsheetMl)) + if (!reader.IsStartElement("styleSheet", ns)) { yield break; } @@ -97,7 +100,7 @@ namespace MiniExcelLibs.OpenXml while (!reader.EOF) { - if (reader.IsStartElement("cellXfs", NsSpreadsheetMl)) + if (reader.IsStartElement("cellXfs", ns)) { if (!XmlReaderHelper.ReadFirstContent(reader)) { @@ -105,7 +108,7 @@ namespace MiniExcelLibs.OpenXml } while (!reader.EOF) { - if (reader.IsStartElement("xf", NsSpreadsheetMl)) + if (reader.IsStartElement("xf", ns)) { int.TryParse(reader.GetAttribute("xfId"), out var xfId); int.TryParse(reader.GetAttribute("numFmtId"), out var numFmtId); @@ -131,7 +134,7 @@ namespace MiniExcelLibs.OpenXml } } - private List _sheetRecords = null; + internal void ReadWorkbookRels(ReadOnlyCollection entries) { _sheetRecords = ReadWorkbook(entries).ToList(); @@ -174,15 +177,12 @@ namespace MiniExcelLibs.OpenXml } } - private static Dictionary _SharedStrings; - - private const string ns = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"; - internal IEnumerable> QueryImpl(Stream stream, bool UseHeaderRow = false) { - using (ZipArchive archive = new ZipArchive(stream, ZipArchiveMode.Read, false, UTF8Encoding.UTF8)) + using (var archive = new ExcelOpenXmlZip(stream)) { - _SharedStrings = GetSharedStrings(archive.Entries); + _SharedStrings = GetSharedStrings(archive); + // if sheets count > 1 need to read xl/_rels/workbook.xml.rels and var sheets = archive.Entries.Where(w => w.FullName.StartsWith("xl/worksheets/sheet", StringComparison.OrdinalIgnoreCase) @@ -356,19 +356,48 @@ namespace MiniExcelLibs.OpenXml { if (reader.IsStartElement("c", ns)) { + var aS = reader.GetAttribute("s"); var cellValue = ReadCell(reader, columnIndex, withoutCR, out var _columnIndex); columnIndex = _columnIndex; - //if not using First Head then using 1,2,3 as index - if (UseHeaderRow) + // xfindex + if (!string.IsNullOrEmpty(aS)) { - if (rowIndex == 0) - headRows.Add(columnIndex, cellValue.ToString()); + int xfIndex = -1; + if(int.TryParse(aS, NumberStyles.Any, CultureInfo.InvariantCulture, out var styleIndex)) + { + xfIndex = styleIndex; + } + // only when have s attribute then load styles xml data + if (_style == null) + _style = new ExcelOpenXmlStyles(archive); + //if not using First Head then using 1,2,3 as index + if (UseHeaderRow) + { + if (rowIndex == 0) + headRows.Add(columnIndex, _style.ConvertValueByStyleFormat(xfIndex, cellValue).ToString()); + else + { + var v = _style.ConvertValueByStyleFormat(int.Parse(aS), cellValue); + cell[headRows[columnIndex]] = _style.ConvertValueByStyleFormat(xfIndex, cellValue); + } + } else - cell[headRows[columnIndex]] = cellValue; + cell[Helpers.GetAlphabetColumnName(columnIndex)] = _style.ConvertValueByStyleFormat(xfIndex, cellValue); } else - cell[Helpers.GetAlphabetColumnName(columnIndex)] = cellValue; + { + //if not using First Head then using 1,2,3 as index + if (UseHeaderRow) + { + if (rowIndex == 0) + headRows.Add(columnIndex, cellValue.ToString()); + else + cell[headRows[columnIndex]] = cellValue; + } + else + cell[Helpers.GetAlphabetColumnName(columnIndex)] = cellValue; + } } else if (!XmlReaderHelper.SkipContent(reader)) break; @@ -399,7 +428,6 @@ namespace MiniExcelLibs.OpenXml private object ReadCell(XmlReader reader, int nextColumnIndex,bool withoutCR, out int columnIndex) { int xfIndex = -1; - var aS = reader.GetAttribute("s"); var aT = reader.GetAttribute("t"); var aR = reader.GetAttribute("r"); @@ -414,23 +442,16 @@ namespace MiniExcelLibs.OpenXml if (!XmlReaderHelper.ReadFirstContent(reader)) return null; - if (aS != null) - { - if (int.TryParse(aS, NumberStyles.Any, CultureInfo.InvariantCulture, out var styleIndex)) - xfIndex = styleIndex; - } - - object value = null; while (!reader.EOF) { - if (reader.IsStartElement("v", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")) + if (reader.IsStartElement("v", ns)) { string rawValue = reader.ReadElementContentAsString(); if (!string.IsNullOrEmpty(rawValue)) ConvertCellValue(rawValue, aT, xfIndex, out value); } - else if (reader.IsStartElement("is", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")) + else if (reader.IsStartElement("is", ns)) { string rawValue = StringHelper.ReadStringItem(reader); if (!string.IsNullOrEmpty(rawValue)) @@ -513,5 +534,4 @@ namespace MiniExcelLibs.OpenXml XmlResolver = null, }; } - } diff --git a/src/MiniExcel/OpenXml/ExcelOpenXmlStyles.cs b/src/MiniExcel/OpenXml/ExcelOpenXmlStyles.cs new file mode 100644 index 0000000..937d249 --- /dev/null +++ b/src/MiniExcel/OpenXml/ExcelOpenXmlStyles.cs @@ -0,0 +1,167 @@ +namespace MiniExcelLibs.OpenXml +{ + using MiniExcelLibs.Utils; + using MiniExcelLibs.Zip; + using System; + using System.Collections.Generic; + using System.Xml; + internal class ExcelOpenXmlStyles + { + const string NsSpreadsheetMl = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"; + + private Dictionary _cellXfs = new Dictionary(); + private Dictionary _cellStyleXfs = new Dictionary(); + + private static readonly XmlReaderSettings XmlSettings = new XmlReaderSettings + { + IgnoreComments = true, + IgnoreWhitespace = true, + XmlResolver = null, + }; + + public ExcelOpenXmlStyles(ExcelOpenXmlZip zip) + { + using (var Reader = zip.GetXmlReader(@"xl/styles.xml")) + { + if (!Reader.IsStartElement("styleSheet", NsSpreadsheetMl)) + return; + if (!XmlReaderHelper.ReadFirstContent(Reader)) + return; + while (!Reader.EOF) + { + if (Reader.IsStartElement("cellXfs", NsSpreadsheetMl)) + { + if (!XmlReaderHelper.ReadFirstContent(Reader)) + return; + + var index = 0; + while (!Reader.EOF) + { + if (Reader.IsStartElement("xf", NsSpreadsheetMl)) + { + int.TryParse(Reader.GetAttribute("xfId"), out var xfId); + int.TryParse(Reader.GetAttribute("numFmtId"), out var numFmtId); + _cellXfs.Add(index, new StyleRecord() { XfId = xfId, NumFmtId = numFmtId }); + Reader.Skip(); + index++; + } + else if (!XmlReaderHelper.SkipContent(Reader)) + break; + } + } + else if (Reader.IsStartElement("cellStyleXfs", NsSpreadsheetMl)) + { + if (!XmlReaderHelper.ReadFirstContent(Reader)) + return; + + var index = 0; + while (!Reader.EOF) + { + if (Reader.IsStartElement("xf", NsSpreadsheetMl)) + { + int.TryParse(Reader.GetAttribute("xfId"), out var xfId); + int.TryParse(Reader.GetAttribute("numFmtId"), out var numFmtId); + + _cellStyleXfs.Add(index, new StyleRecord() { XfId = xfId, NumFmtId = numFmtId }); + Reader.Skip(); + index++; + } + else if (!XmlReaderHelper.SkipContent(Reader)) + break; + } + } + else if (!XmlReaderHelper.SkipContent(Reader)) + { + break; + } + } + } + } + + public NumberFormatString GetStyleFormat(int index) + { + if (_cellXfs.TryGetValue(index, out var styleRecord)) + { + if (Formats.TryGetValue(styleRecord.NumFmtId, out var numberFormat)) + { + return numberFormat; + } + return null; + } + return null; + } + + public object ConvertValueByStyleFormat(int index, object value) + { + var sf = this.GetStyleFormat(index); + if (sf == null) + return value; + if (sf?.Type == typeof(DateTime?)) + { + if (double.TryParse(value?.ToString(), out var s)) + { + return DateTimeHelper.FromOADate(s); + } + } + return value; + } + + private static Dictionary Formats { get; } = new Dictionary() + { + { 0, new NumberFormatString("General",typeof(string)) }, + { 1, new NumberFormatString("0",typeof(decimal?)) }, + { 2, new NumberFormatString("0.00",typeof(decimal?)) }, + { 3, new NumberFormatString("#,##0",typeof(decimal?)) }, + { 4, new NumberFormatString("#,##0.00",typeof(decimal?)) }, + { 5, new NumberFormatString("\"$\"#,##0_);(\"$\"#,##0)",typeof(decimal?)) }, + { 6, new NumberFormatString("\"$\"#,##0_);[Red](\"$\"#,##0)",typeof(decimal?)) }, + { 7, new NumberFormatString("\"$\"#,##0.00_);(\"$\"#,##0.00)",typeof(decimal?)) }, + { 8, new NumberFormatString("\"$\"#,##0.00_);[Red](\"$\"#,##0.00)",typeof(string)) }, + { 9, new NumberFormatString("0%",typeof(decimal?)) }, + { 10, new NumberFormatString("0.00%",typeof(string)) }, + { 11, new NumberFormatString("0.00E+00",typeof(string)) }, + { 12, new NumberFormatString("# ?/?",typeof(string)) }, + { 13, new NumberFormatString("# ??/??",typeof(string)) }, + { 14, new NumberFormatString("d/m/yyyy",typeof(DateTime?)) }, + { 15, new NumberFormatString("d-mmm-yy",typeof(DateTime?)) }, + { 16, new NumberFormatString("d-mmm",typeof(DateTime?)) }, + { 17, new NumberFormatString("mmm-yy",typeof(TimeSpan)) }, + { 18, new NumberFormatString("h:mm AM/PM",typeof(TimeSpan)) }, + { 19, new NumberFormatString("h:mm:ss AM/PM",typeof(TimeSpan)) }, + { 20, new NumberFormatString("h:mm",typeof(TimeSpan)) }, + { 21, new NumberFormatString("h:mm:ss",typeof(TimeSpan)) }, + { 22, new NumberFormatString("m/d/yy h:mm",typeof(DateTime?)) }, + // 23..36 international/unused + { 37, new NumberFormatString("#,##0_);(#,##0)",typeof(string)) }, + { 38, new NumberFormatString("#,##0_);[Red](#,##0)",typeof(string)) }, + { 39, new NumberFormatString("#,##0.00_);(#,##0.00)",typeof(string)) }, + { 40, new NumberFormatString("#,##0.00_);[Red](#,##0.00)",typeof(string)) }, + { 41, new NumberFormatString("_(\"$\"* #,##0_);_(\"$\"* (#,##0);_(\"$\"* \"-\"_);_(@_)",typeof(string)) }, + { 42, new NumberFormatString("_(* #,##0_);_(* (#,##0);_(* \"-\"_);_(@_)",typeof(string)) }, + { 43, new NumberFormatString("_(\"$\"* #,##0.00_);_(\"$\"* (#,##0.00);_(\"$\"* \"-\"??_);_(@_)",typeof(string)) }, + { 44, new NumberFormatString("_(* #,##0.00_);_(* (#,##0.00);_(* \"-\"??_);_(@_)",typeof(string)) }, + { 45, new NumberFormatString("mm:ss",typeof(TimeSpan)) }, + { 46, new NumberFormatString("[h]:mm:ss",typeof(TimeSpan)) }, + { 47, new NumberFormatString("mm:ss.0",typeof(TimeSpan)) }, + { 48, new NumberFormatString("##0.0E+0",typeof(string)) }, + { 49, new NumberFormatString("@",typeof(string)) }, + }; + } + + internal class NumberFormatString + { + public string FormatString { get; } + public Type Type { get; set; } + public NumberFormatString(string formatString, Type type) + { + FormatString = formatString; + Type = type; + } + } + + internal class StyleRecord + { + public int XfId { get; set; } + public int NumFmtId { get; set; } + } +} \ No newline at end of file diff --git a/src/MiniExcel/Zip/ExcelOpenXmlZip.cs b/src/MiniExcel/Zip/ExcelOpenXmlZip.cs new file mode 100644 index 0000000..a7e26de --- /dev/null +++ b/src/MiniExcel/Zip/ExcelOpenXmlZip.cs @@ -0,0 +1,87 @@ +using System; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.IO; +using System.IO.Compression; +using System.Xml; + +namespace MiniExcelLibs.Zip +{ + /// Copy & modified by ExcelDataReader ZipWorker + internal class ExcelOpenXmlZip : IDisposable + { + private readonly Dictionary _entries; + private bool _disposed; + private Stream _zipStream; + private ZipArchive _zipFile; + public ReadOnlyCollection Entries; + private static readonly XmlReaderSettings XmlSettings = new XmlReaderSettings + { + IgnoreComments = true, + IgnoreWhitespace = true, + XmlResolver = null, + }; + public ExcelOpenXmlZip(Stream fileStream) + { + _zipStream = fileStream ?? throw new ArgumentNullException(nameof(fileStream)); + _zipFile = new ZipArchive(fileStream); + _entries = new Dictionary(StringComparer.OrdinalIgnoreCase); + Entries = _zipFile.Entries; //TODO:need to remove + foreach (var entry in _zipFile.Entries) + { + _entries.Add(entry.FullName.Replace('\\', '/'), entry); + } + } + + public ZipArchiveEntry GetEntry(string path) + { + if (_entries.TryGetValue(path, out var entry)) + return entry; + return null; + } + + public XmlReader GetXmlReader(string path) + { + var entry = GetEntry(path); + if (entry != null) + return XmlReader.Create(entry.Open(), XmlSettings); + return null; + } + + ~ExcelOpenXmlZip() + { + Dispose(false); + } + + public void Dispose() + { + Dispose(true); + + GC.SuppressFinalize(this); + } + + private void Dispose(bool disposing) + { + // Check to see if Dispose has already been called. + if (!_disposed) + { + if (disposing) + { + if (_zipFile != null) + { + _zipFile.Dispose(); + _zipFile = null; + } + + if (_zipStream != null) + { + _zipStream.Dispose(); + _zipStream = null; + } + } + + _disposed = true; + } + } + } +} diff --git a/src/MiniExcel/Zip/ZipPackageInfo.cs b/src/MiniExcel/Zip/ZipPackageInfo.cs index 5622b37..92ba8ee 100644 --- a/src/MiniExcel/Zip/ZipPackageInfo.cs +++ b/src/MiniExcel/Zip/ZipPackageInfo.cs @@ -1,4 +1,6 @@ -namespace MiniExcelLibs.Zip +using System; + +namespace MiniExcelLibs.Zip { internal class ZipPackageInfo { diff --git a/tests/MiniExcelTests/MiniExcelHelperTests.cs b/tests/MiniExcelTests/MiniExcelHelperTests.cs index a063305..a19f85d 100644 --- a/tests/MiniExcelTests/MiniExcelHelperTests.cs +++ b/tests/MiniExcelTests/MiniExcelHelperTests.cs @@ -181,6 +181,17 @@ namespace MiniExcelLibs.Tests } } + [Fact()] + public void TestDatetimeSpanFormat_ClosedXml() + { + var path = @"..\..\..\..\..\samples\xlsx\TestDatetimeSpanFormat_ClosedXml.xlsx"; + using (var stream = FileHelper.OpenRead(path)) + { + var a = stream.QueryFirst().A; + Assert.Equal(DateTime.Parse("2021-03-19T21:01:17.4950000"), (DateTime)a); + } + } + [Fact()] public void LargeFileQueryStrongTypeMapping_Test() { @@ -230,6 +241,16 @@ namespace MiniExcelLibs.Tests } } + [Fact()] + public void QueryCustomStyle() + { + var path = @"..\..\..\..\..\samples\xlsx\TestWihoutRAttribute.xlsx"; + using (var stream = File.OpenRead(path)) + { + + } + } + [Fact()] public void QuerySheetWithoutRAttribute() {