From 63fb261b02d94cdc2155a2927b8bbf74c654dda2 Mon Sep 17 00:00:00 2001 From: wei Date: Sat, 13 Mar 2021 00:51:29 +0800 Subject: [PATCH] Support Query Dynamic and IEnumerable lazy loading --- src/MiniExcel/MiniExcel.cs | 110 +------- src/MiniExcel/OpenXml/ExcelOpenXmlReader.cs | 125 --------- .../OpenXml/ExcelOpenXmlSheetReader.cs | 246 ++++++++++++++++++ src/MiniExcel/OpenXml/Worksheet.cs | 10 - src/MiniExcel/Utils/DateTimeHelper.cs | 81 ++++++ src/MiniExcel/Utils/Helpers.cs | 73 ++++++ src/MiniExcel/Utils/ReferenceHelper.cs | 56 ++++ src/MiniExcel/Utils/StringHelper.cs | 59 +++++ src/MiniExcel/Utils/XmlReaderHelper.cs | 33 +++ src/MiniExcelTests/MiniExcelHelperTests.cs | 77 +++--- src/MiniExcelTests/MiniExcelTests.csproj | 2 + 11 files changed, 586 insertions(+), 286 deletions(-) delete mode 100644 src/MiniExcel/OpenXml/ExcelOpenXmlReader.cs create mode 100644 src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs delete mode 100644 src/MiniExcel/OpenXml/Worksheet.cs create mode 100644 src/MiniExcel/Utils/DateTimeHelper.cs create mode 100644 src/MiniExcel/Utils/Helpers.cs create mode 100644 src/MiniExcel/Utils/ReferenceHelper.cs create mode 100644 src/MiniExcel/Utils/StringHelper.cs create mode 100644 src/MiniExcel/Utils/XmlReaderHelper.cs diff --git a/src/MiniExcel/MiniExcel.cs b/src/MiniExcel/MiniExcel.cs index a8887e1..8f78404 100644 --- a/src/MiniExcel/MiniExcel.cs +++ b/src/MiniExcel/MiniExcel.cs @@ -3,14 +3,11 @@ using MiniExcelLibs.OpenXml; using MiniExcelLibs.Zip; using System; - using System.Collections; using System.Collections.Generic; using System.Data; using System.IO; using System.IO.Compression; - using System.Linq; using System.Text; - using System.Xml.Linq; public static partial class MiniExcel { @@ -163,112 +160,9 @@ CreateXlsxFile(filePath, defaultFiles); } - public static Dictionary> Query(string path) + public static IEnumerable Query(this Stream stream, bool UseHeaderRow = false) { - using (var stream = File.OpenRead(path)) - { - return stream.Query(); - } - } - - public static Dictionary> Query(this Stream stream) - { - using (var reader = new ExcelOpenXmlReader(stream)) - { - var d = new Dictionary>(); - while (reader.Read()) - { - var dic = new Dictionary(); - for (int i = 0; i < reader.FieldCount; i++) - { - var v = reader.GetValue(i); - dic.Add(i, v); - } - d.Add(reader.CurrentRowIndex, dic); - } - return d; - } - } - - internal static Worksheet GetFirstSheet(Stream stream) - { - using (ZipArchive archive = new ZipArchive(stream, ZipArchiveMode.Read, false, UTF8Encoding.UTF8)) - { - var rows = new Dictionary>(); - - //sharedStrings must in memory cache - Dictionary GetSharedStrings() - { - var sharedStringsEntry = archive.Entries.SingleOrDefault(w => w.FullName == "xl/sharedStrings.xml"); - var xml = ConvertToString(sharedStringsEntry); - var xl = XElement.Parse(xml); - var ts = xl.Descendants(ExcelOpenXmlXName.T).Select((s, i) => new { i, v = s.Value?.ToString() }) - .ToDictionary(s => s.i, s => s.v) - ; - return ts; - } - - var sharedStrings = GetSharedStrings(); - - var rowIndexMaximum = int.MinValue; - var columnIndexMaximum = int.MinValue; - - //notice: for performance just read first one and no care the order - var firstSheetEntry = archive.Entries.First(w => w.FullName.StartsWith("xl/worksheets/", StringComparison.OrdinalIgnoreCase)); - { - var xml = ConvertToString(firstSheetEntry); - var xl = XElement.Parse(xml); - - foreach (var row in xl.Descendants(ExcelOpenXmlXName.Row)) - { - // - var datarow = new Dictionary(); - { - var r = row.Attribute("r")?.Value?.ToString(); - - var rowIndex = int.MinValue; - if (int.TryParse(r, out var _rowIndex)) - rowIndex = _rowIndex - 1; // The row attribute is 1 - based - rowIndexMaximum = Math.Max(rowIndexMaximum, rowIndex); - - rows.Add(rowIndex, datarow); - } - - foreach (var cell in row.Descendants(ExcelOpenXmlXName.C)) - { - var t = cell.Attribute("t")?.Value?.ToString(); - var v = cell.Descendants(ExcelOpenXmlXName.V).SingleOrDefault()?.Value; - if (t == "s") - { - if (!string.IsNullOrEmpty(v)) - v = sharedStrings[int.Parse(v)]; - } - - var r = cell.Attribute("r")?.Value?.ToString(); - { - var cellIndex = ExcelOpenXmlUtils.GetCellColumnIndex(r) - 1; - columnIndexMaximum = Math.Max(columnIndexMaximum, cellIndex); - - datarow.Add(cellIndex, v); - } - } - - } - } - - return new Worksheet { - Rows = rows, - FieldCount = columnIndexMaximum + 1, - RowCount = rowIndexMaximum + 1 - }; - } - } - - private static string ConvertToString(ZipArchiveEntry entry) - { - using (var eStream = entry.Open()) - using (var reader = new StreamReader(eStream)) - return reader.ReadToEnd(); + return new ExcelOpenXmlSheetReader().QueryImpl(stream, UseHeaderRow); } private readonly static UTF8Encoding Utf8WithBom = new System.Text.UTF8Encoding(true); diff --git a/src/MiniExcel/OpenXml/ExcelOpenXmlReader.cs b/src/MiniExcel/OpenXml/ExcelOpenXmlReader.cs deleted file mode 100644 index 455fb63..0000000 --- a/src/MiniExcel/OpenXml/ExcelOpenXmlReader.cs +++ /dev/null @@ -1,125 +0,0 @@ -namespace MiniExcelLibs.OpenXml -{ - using System; - using System.Collections.Generic; - using System.Data; - using System.IO; - using System.Linq; - - internal class ExcelOpenXmlReader : IDataReader - { - private static Worksheet _Sheet ; - private Dictionary> _Rows { get { return _Sheet.Rows; } } - - public ExcelOpenXmlReader(Stream stream) - { - _Sheet = MiniExcel.GetFirstSheet(stream); - } - - public int RowCount { get { return _Sheet.RowCount; } } - public int FieldCount { get { return _Sheet.FieldCount; } } - public int Depth { get; private set; } - public int CurrentRowIndex { get { return Depth - 1; } } - - public object this[int i] => GetValue(i); - public object this[string name] => GetValue(GetOrdinal(name)); - - public bool Read() - { - if (Depth == RowCount) - return false; - Depth++; - return true; - } - - public string GetName(int i) => ExcelOpenXmlUtils.ConvertColumnName(i + 1); - - - public int GetOrdinal(string name) => ExcelOpenXmlUtils.GetCellColumnIndex(name); - - public object GetValue(int i) - { - //if (CurrentRowIndex < 0) - // throw new InvalidOperationException("Invalid attempt to read when no data is present."); - if (!_Rows.Keys.Contains(CurrentRowIndex)) - return null; - if (_Rows[this.CurrentRowIndex].TryGetValue(i, out var v)) - return v; - return null; - } - - public int GetValues(object[] values) - { - return this.Depth; - } - - //TODO: multiple sheets - public bool NextResult() => false; - - public void Dispose() { } - - public void Close() { } - - public int RecordsAffected => throw new NotImplementedException(); - - bool IDataReader.IsClosed => this.RowCount - 1 == this.Depth; - - public string GetString(int i) => (string)GetValue(i); - - public bool GetBoolean(int i) => (bool)GetValue(i); - - public byte GetByte(int i) => (byte)GetValue(i); - - public long GetBytes(int i, long fieldOffset, byte[] buffer, int bufferoffset, int length) => throw new NotImplementedException(); - - public char GetChar(int i) => (char)GetValue(i); - - public long GetChars(int i, long fieldoffset, char[] buffer, int bufferoffset, int length) => throw new NotImplementedException(); - - public IDataReader GetData(int i) => throw new NotImplementedException(); - - public string GetDataTypeName(int i) => throw new NotImplementedException(); - - public DateTime GetDateTime(int i) => (DateTime)GetValue(i); - - public decimal GetDecimal(int i) => (decimal)GetValue(i); - - public double GetDouble(int i) => (double)GetValue(i); - - public Type GetFieldType(int i) - { - var v = GetValue(i); - return v == null ? typeof(string) : v.GetType(); - } - - public float GetFloat(int i) => (float)GetValue(i); - - public Guid GetGuid(int i) => (Guid)GetValue(i); - - public short GetInt16(int i) => (short)GetValue(i); - - public int GetInt32(int i) => (int)GetValue(i); - - public long GetInt64(int i) => (long)GetValue(i); - - public DataTable GetSchemaTable() - { - var dataTable = new DataTable("SchemaTable"); - dataTable.Locale = System.Globalization.CultureInfo.InvariantCulture; - dataTable.Columns.Add("ColumnName", typeof(string)); - dataTable.Columns.Add("ColumnOrdinal", typeof(int)); - for (int i = 0; i < this.FieldCount; i++) - { - dataTable.Rows.Add(this.GetName(i), i); - } - DataColumnCollection columns = dataTable.Columns; - foreach (DataColumn item in columns) - { - item.ReadOnly = true; - } - return dataTable; - } - - public bool IsDBNull(int i) => GetValue(i) == null; - } -} diff --git a/src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs b/src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs new file mode 100644 index 0000000..66b901f --- /dev/null +++ b/src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs @@ -0,0 +1,246 @@ +using MiniExcelLibs.Utils; +using System; +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.IO.Compression; +using System.Linq; +using System.Text; +using System.Xml; +using System.Xml.Linq; + +namespace MiniExcelLibs.OpenXml +{ + internal class ExcelOpenXmlSheetReader + { + internal Dictionary GetSharedStrings(ZipArchiveEntry sharedStringsEntry) + { + var xl = XElement.Load(sharedStringsEntry.Open()); + var ts = xl.Descendants(ExcelOpenXmlXName.T).Select((s, i) => new { i, v = s.Value?.ToString() }) + .ToDictionary(s => s.i, s => s.v) + ; + return ts; + } + + private static Dictionary _SharedStrings; + + internal IEnumerable QueryImpl(Stream stream, bool UseHeaderRow = false) + { + using (ZipArchive archive = new ZipArchive(stream, ZipArchiveMode.Read, false, UTF8Encoding.UTF8)) + { + var e = archive.Entries.SingleOrDefault(w => w.FullName == "xl/sharedStrings.xml"); + _SharedStrings = GetSharedStrings(e); + + var firstSheetEntry = archive.Entries.First(w => w.FullName.StartsWith("xl/worksheets/", StringComparison.OrdinalIgnoreCase)); + using (var firstSheetEntryStream = firstSheetEntry.Open()) + { + using (XmlReader reader = XmlReader.Create(firstSheetEntryStream, XmlSettings)) + { + var ns = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"; + if (!reader.IsStartElement("worksheet", ns)) + yield break; + + + if (!XmlReaderHelper.ReadFirstContent(reader)) + yield break; + + var maxRowIndex = -1; + var maxColumnIndex = -1; + while (!reader.EOF) + { + //TODO: will dimension after sheetData? + //this method logic depends on dimension to get maxcolumnIndex, if without dimension then it need to foreach all rows first time to get maxColumn and maxRowColumn + if (reader.IsStartElement("dimension", ns)) + { + var @ref = reader.GetAttribute("ref"); + if (string.IsNullOrEmpty(@ref)) + throw new InvalidOperationException("Without sheet dimension data"); + var rs = @ref.Split(':'); + if (ReferenceHelper.ParseReference(rs[1], out int cIndex, out int rIndex)) + { + maxColumnIndex = cIndex - 1; + maxRowIndex = rIndex - 1; + } + else + throw new InvalidOperationException("Invaild sheet dimension start data"); + } + if (reader.IsStartElement("sheetData", ns)) + { + if (!XmlReaderHelper.ReadFirstContent(reader)) + { + continue; + } + + Dictionary headRows = new Dictionary(); + int rowIndex = -1; + int nextRowIndex = 0; + while (!reader.EOF) + { + if (reader.IsStartElement("row", ns)) + { + nextRowIndex = rowIndex + 1; + if (int.TryParse(reader.GetAttribute("r"), out int arValue)) + rowIndex = arValue - 1; // The row attribute is 1-based + else + rowIndex++; + if (!XmlReaderHelper.ReadFirstContent(reader)) + continue; + + // fill empty rows + { + if (nextRowIndex < rowIndex) + { + for (int i = nextRowIndex; i < rowIndex; i++) + if (UseHeaderRow) + yield return Helpers.GetEmptyExpandoObject(headRows); + else + yield return Helpers.GetEmptyExpandoObject(maxColumnIndex); + } + } + + // Set Cells + { + var cell = UseHeaderRow ? Helpers.GetEmptyExpandoObject(headRows) : Helpers.GetEmptyExpandoObject(maxColumnIndex); + var columnIndex = 0; + while (!reader.EOF) + { + if (reader.IsStartElement("c", ns)) + { + var cellValue = ReadCell(reader, columnIndex, out var _columnIndex); + columnIndex = _columnIndex; + + //if not using First Head then using 1,2,3 as index + if (UseHeaderRow) + { + if (rowIndex == 0) + headRows.Add(columnIndex, cellValue.ToString()); + else + cell[headRows[columnIndex]] = cellValue; + } + else + cell[columnIndex.ToString()] = cellValue; + } + else if (!XmlReaderHelper.SkipContent(reader)) + break; + } + + if (UseHeaderRow && rowIndex == 0) + continue; + + yield return cell; + } + } + else if (!XmlReaderHelper.SkipContent(reader)) + { + break; + } + } + + } + else if (!XmlReaderHelper.SkipContent(reader)) + { + break; + } + } + } + } + } + } + + private object ReadCell(XmlReader reader, int nextColumnIndex, out int columnIndex) + { + var aT = reader.GetAttribute("t"); + var aR = reader.GetAttribute("r"); + + //TODO:need to check only need nextColumnIndex or columnIndex + if (ReferenceHelper.ParseReference(aR, out int referenceColumn, out _)) + columnIndex = referenceColumn - 1; // ParseReference is 1-based + else + columnIndex = nextColumnIndex; + + if (!XmlReaderHelper.ReadFirstContent(reader)) + return null; + + + object value = null; + while (!reader.EOF) + { + if (reader.IsStartElement("v", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")) + { + string rawValue = reader.ReadElementContentAsString(); + if (!string.IsNullOrEmpty(rawValue)) + ConvertCellValue(rawValue, aT, out value); + } + else if (reader.IsStartElement("is", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")) + { + string rawValue = StringHelper.ReadStringItem(reader); + if (!string.IsNullOrEmpty(rawValue)) + ConvertCellValue(rawValue, aT, out value); + } + else if (!XmlReaderHelper.SkipContent(reader)) + { + break; + } + } + return value; + } + + private void ConvertCellValue(string rawValue, string aT, out object value) + { + const NumberStyles style = NumberStyles.Any; + var invariantCulture = CultureInfo.InvariantCulture; + + switch (aT) + { + case "s": //// if string + if (int.TryParse(rawValue, style, invariantCulture, out var sstIndex)) + { + if (_SharedStrings.ContainsKey(sstIndex)) + value = _SharedStrings[sstIndex]; + else + value = sstIndex; + return; + } + + value = rawValue; + return; + case "inlineStr": //// if string inline + case "str": //// if cached formula string + value = Helpers.ConvertEscapeChars(rawValue); + return; + case "b": //// boolean + value = rawValue == "1"; + return; + case "d": //// ISO 8601 date + if (DateTime.TryParseExact(rawValue, "yyyy-MM-dd", invariantCulture, DateTimeStyles.AllowLeadingWhite | DateTimeStyles.AllowTrailingWhite, out var date)) + { + value = date; + return; + } + + value = rawValue; + return; + case "e": //// error + value = rawValue; + return; + default: + if (double.TryParse(rawValue, style, invariantCulture, out double number)) + { + value = number; + return; + } + + value = rawValue; + return; + } + } + + private static readonly XmlReaderSettings XmlSettings = new XmlReaderSettings + { + IgnoreComments = true, + IgnoreWhitespace = true, + XmlResolver = null, + }; + } + +} diff --git a/src/MiniExcel/OpenXml/Worksheet.cs b/src/MiniExcel/OpenXml/Worksheet.cs deleted file mode 100644 index fc2865f..0000000 --- a/src/MiniExcel/OpenXml/Worksheet.cs +++ /dev/null @@ -1,10 +0,0 @@ -namespace MiniExcelLibs.OpenXml -{ - using System.Collections.Generic; - internal class Worksheet - { - public int RowCount { get; set; } - public int FieldCount { get; set; } - public Dictionary> Rows { get; set; } - } -} diff --git a/src/MiniExcel/Utils/DateTimeHelper.cs b/src/MiniExcel/Utils/DateTimeHelper.cs new file mode 100644 index 0000000..094c057 --- /dev/null +++ b/src/MiniExcel/Utils/DateTimeHelper.cs @@ -0,0 +1,81 @@ +/* + Code from ExcelDataReader : https://github.com/ExcelDataReader/ExcelDataReader/blob/master/src/ExcelDataReader/Core/Helpers.cs + */ + +namespace MiniExcelLibs.Utils +{ + using System; + + internal static class DateTimeHelper + { + // All OA dates must be greater than (not >=) OADateMinAsDouble + public const double OADateMinAsDouble = -657435.0; + + // All OA dates must be less than (not <=) OADateMaxAsDouble + public const double OADateMaxAsDouble = 2958466.0; + + // From DateTime class to enable OADate in PCL + // Number of 100ns ticks per time unit + private const long TicksPerMillisecond = 10000; + private const long TicksPerSecond = TicksPerMillisecond * 1000; + private const long TicksPerMinute = TicksPerSecond * 60; + private const long TicksPerHour = TicksPerMinute * 60; + private const long TicksPerDay = TicksPerHour * 24; + + // Number of milliseconds per time unit + private const int MillisPerSecond = 1000; + private const int MillisPerMinute = MillisPerSecond * 60; + private const int MillisPerHour = MillisPerMinute * 60; + private const int MillisPerDay = MillisPerHour * 24; + + // Number of days in a non-leap year + private const int DaysPerYear = 365; + + // Number of days in 4 years + private const int DaysPer4Years = DaysPerYear * 4 + 1; + + // Number of days in 100 years + private const int DaysPer100Years = DaysPer4Years * 25 - 1; + + // Number of days in 400 years + private const int DaysPer400Years = DaysPer100Years * 4 + 1; + + // Number of days from 1/1/0001 to 12/30/1899 + private const int DaysTo1899 = DaysPer400Years * 4 + DaysPer100Years * 3 - 367; + + // Number of days from 1/1/0001 to 12/31/9999 + private const int DaysTo10000 = DaysPer400Years * 25 - 366; + + private const long MaxMillis = (long)DaysTo10000 * MillisPerDay; + + private const long DoubleDateOffset = DaysTo1899 * TicksPerDay; + + public static DateTime FromOADate(double d) + { + return new DateTime(DoubleDateToTicks(d), DateTimeKind.Unspecified); + } + + // duplicated from DateTime + internal static long DoubleDateToTicks(double value) + { + if (value >= OADateMaxAsDouble || value <= OADateMinAsDouble) + throw new ArgumentException("Invalid OA Date"); + long millis = (long)(value * MillisPerDay + (value >= 0 ? 0.5 : -0.5)); + + // The interesting thing here is when you have a value like 12.5 it all positive 12 days and 12 hours from 01/01/1899 + // However if you a value of -12.25 it is minus 12 days but still positive 6 hours, almost as though you meant -11.75 all negative + // This line below fixes up the millis in the negative case + if (millis < 0) + { + millis -= millis % MillisPerDay * 2; + } + + millis += DoubleDateOffset / TicksPerMillisecond; + + if (millis < 0 || millis >= MaxMillis) + throw new ArgumentException("OA Date out of range"); + return millis * TicksPerMillisecond; + } + } + +} diff --git a/src/MiniExcel/Utils/Helpers.cs b/src/MiniExcel/Utils/Helpers.cs new file mode 100644 index 0000000..a2043c2 --- /dev/null +++ b/src/MiniExcel/Utils/Helpers.cs @@ -0,0 +1,73 @@ +namespace MiniExcelLibs.Utils +{ + using System; + using System.Collections.Generic; + using System.Dynamic; + using System.Globalization; + using System.Text.RegularExpressions; + + internal static class Helpers + { + private static readonly Regex EscapeRegex = new Regex("_x([0-9A-F]{4,4})_"); + + public static IDictionary GetEmptyExpandoObject(int maxColumnIndex) + { + // TODO: strong type mapping can ignore this + // TODO: it can recode better performance + var cell = (IDictionary)new ExpandoObject(); + for (int i = 0; i <= maxColumnIndex; i++) + cell.Add(i.ToString(), DBNull.Value); + return cell; + } + + public static IDictionary GetEmptyExpandoObject(Dictionary hearrows) + { + // TODO: strong type mapping can ignore this + // TODO: it can recode better performance + var cell = (IDictionary)new ExpandoObject(); + foreach (var hr in hearrows) + cell.Add(hr.Value, DBNull.Value); + return cell; + } + + + public static string ConvertEscapeChars(string input) + { + return EscapeRegex.Replace(input, m => ((char)uint.Parse(m.Groups[1].Value, NumberStyles.HexNumber)).ToString()); + } + + /// + /// Convert a double from Excel to an OA DateTime double. + /// The returned value is normalized to the '1900' date mode and adjusted for the 1900 leap year bug. + /// + public static double AdjustOADateTime(double value, bool date1904) + { + if (!date1904) + { + // Workaround for 1900 leap year bug in Excel + if (value >= 0.0 && value < 60.0) + return value + 1; + } + else + { + return value + 1462.0; + } + + return value; + } + + public static bool IsValidOADateTime(double value) + { + return value > DateTimeHelper.OADateMinAsDouble && value < DateTimeHelper.OADateMaxAsDouble; + } + + public static object ConvertFromOATime(double value, bool date1904) + { + var dateValue = AdjustOADateTime(value, date1904); + if (IsValidOADateTime(dateValue)) + return DateTimeHelper.FromOADate(dateValue); + return value; + } + } + +} diff --git a/src/MiniExcel/Utils/ReferenceHelper.cs b/src/MiniExcel/Utils/ReferenceHelper.cs new file mode 100644 index 0000000..58322bc --- /dev/null +++ b/src/MiniExcel/Utils/ReferenceHelper.cs @@ -0,0 +1,56 @@ +namespace MiniExcelLibs.Utils +{ + using System.Globalization; + + internal static class ReferenceHelper + { + /// + /// Logic for the Excel dimensions. Ex: A15 + /// + /// The value. + /// The column, 1-based. + /// The row, 1-based. + public static bool ParseReference(string value, out int column, out int row) + { + column = 0; + var position = 0; + const int offset = 'A' - 1; + + if (value != null) + { + while (position < value.Length) + { + var c = value[position]; + if (c >= 'A' && c <= 'Z') + { + position++; + column *= 26; + column += c - offset; + continue; + } + + if (char.IsDigit(c)) + break; + + position = 0; + break; + } + } + + if (position == 0) + { + column = 0; + row = 0; + return false; + } + + if (!int.TryParse(value.Substring(position), NumberStyles.None, CultureInfo.InvariantCulture, out row)) + { + return false; + } + + return true; + } + } + +} diff --git a/src/MiniExcel/Utils/StringHelper.cs b/src/MiniExcel/Utils/StringHelper.cs new file mode 100644 index 0000000..f77db1e --- /dev/null +++ b/src/MiniExcel/Utils/StringHelper.cs @@ -0,0 +1,59 @@ +namespace MiniExcelLibs.Utils +{ + using System.Xml; + + internal static class StringHelper + { + public static string ReadStringItem(XmlReader reader) + { + string result = string.Empty; + if (!XmlReaderHelper.ReadFirstContent(reader)) + { + return result; + } + + while (!reader.EOF) + { + if (reader.IsStartElement("t", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")) + { + // There are multiple in a . Concatenate within an . + result += reader.ReadElementContentAsString(); + } + else if (reader.IsStartElement("r", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")) + { + result += ReadRichTextRun(reader); + } + else if (!XmlReaderHelper.SkipContent(reader)) + { + break; + } + } + + return result; + } + + private static string ReadRichTextRun(XmlReader reader) + { + string result = string.Empty; + if (!XmlReaderHelper.ReadFirstContent(reader)) + { + return result; + } + + while (!reader.EOF) + { + if (reader.IsStartElement("t", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")) + { + result += reader.ReadElementContentAsString(); + } + else if (!XmlReaderHelper.SkipContent(reader)) + { + break; + } + } + + return result; + } + } + +} diff --git a/src/MiniExcel/Utils/XmlReaderHelper.cs b/src/MiniExcel/Utils/XmlReaderHelper.cs new file mode 100644 index 0000000..bc636cd --- /dev/null +++ b/src/MiniExcel/Utils/XmlReaderHelper.cs @@ -0,0 +1,33 @@ +namespace MiniExcelLibs.Utils +{ + using System.Xml; + + internal static class XmlReaderHelper + { + public static bool ReadFirstContent(XmlReader xmlReader) + { + if (xmlReader.IsEmptyElement) + { + xmlReader.Read(); + return false; + } + + xmlReader.MoveToContent(); + xmlReader.Read(); + return true; + } + + public static bool SkipContent(XmlReader xmlReader) + { + if (xmlReader.NodeType == XmlNodeType.EndElement) + { + xmlReader.Read(); + return false; + } + + xmlReader.Skip(); + return true; + } + } + +} diff --git a/src/MiniExcelTests/MiniExcelHelperTests.cs b/src/MiniExcelTests/MiniExcelHelperTests.cs index df99180..623f701 100644 --- a/src/MiniExcelTests/MiniExcelHelperTests.cs +++ b/src/MiniExcelTests/MiniExcelHelperTests.cs @@ -1,15 +1,14 @@ using Xunit; -using MiniExcelLibs; using System; -using System.Collections.Generic; using System.Linq; -using System.Text; -using System.Threading.Tasks; using System.IO; using OfficeOpenXml; using ClosedXML.Excel; using System.IO.Packaging; using System.Data; +using ExcelDataReader; +using System.Collections.Generic; +using System.Dynamic; namespace MiniExcelLibs.Tests { @@ -22,50 +21,42 @@ namespace MiniExcelLibs.Tests using (var stream = File.OpenRead(path)) { var rows = stream.Query(); + foreach (var item in rows) + { + + } + } + } - Assert.Equal("a", rows[0][0]); - Assert.Equal("b", rows[0][1]); - Assert.Equal("c", rows[0][2]); - Assert.Equal("d", rows[0][3]); + [Fact()] + public void QueryExcelDataReaderCheckTest() + { +#if NETCOREAPP3_1 || NET5_0 + System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance); +#endif + var path = @"..\..\..\..\..\samples\xlsx\TestCenterEmptyRow\TestCenterEmptyRow.xlsx"; - Assert.Equal("1", rows[1][0]); - Assert.Null(rows[1][1]); - Assert.Equal("3", rows[1][2]); - Assert.Null(rows[1][3]); - - Assert.Null(rows[2][0]); - Assert.Equal("2", rows[2][1]); - Assert.Null(rows[2][2]); - Assert.Equal("4", rows[2][3]); - - Assert.Null(rows[3][0]); - Assert.Null(rows[3][1]); - Assert.Null(rows[3][2]); - Assert.Null(rows[3][3]); + DataSet exceldatareaderResult; + using (var stream = File.OpenRead(path)) + using (var reader = ExcelReaderFactory.CreateReader(stream)) + { + exceldatareaderResult = reader.AsDataSet(); } + using (var stream = File.OpenRead(path)) { - var rows = MiniExcel.Query(path); - - Assert.Equal("a", rows[0][0]); - Assert.Equal("b", rows[0][1]); - Assert.Equal("c", rows[0][2]); - Assert.Equal("d", rows[0][3]); - - Assert.Equal("1", rows[1][0]); - Assert.Null(rows[1][1]); - Assert.Equal("3", rows[1][2]); - Assert.Null(rows[1][3]); - - Assert.Null(rows[2][0]); - Assert.Equal("2", rows[2][1]); - Assert.Null(rows[2][2]); - Assert.Equal("4", rows[2][3]); - - Assert.Null(rows[3][0]); - Assert.Null(rows[3][1]); - Assert.Null(rows[3][2]); - Assert.Null(rows[3][3]); + var rows = stream.Query().ToList(); + foreach (IDictionary row in rows) + { + var rowIndex = rows.IndexOf(row); + var keys = row.Keys; + foreach (var key in keys) + { + var eV = exceldatareaderResult.Tables[0].Rows[rowIndex][int.Parse(key)]; + var v = row[key]; + Assert.Equal(eV, v); + } + } } } diff --git a/src/MiniExcelTests/MiniExcelTests.csproj b/src/MiniExcelTests/MiniExcelTests.csproj index 9eb2e12..346ca96 100644 --- a/src/MiniExcelTests/MiniExcelTests.csproj +++ b/src/MiniExcelTests/MiniExcelTests.csproj @@ -15,6 +15,8 @@ + + all