From 912a16b911659780f13429c8cf777f05d6dcf7a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=BD=97=E5=A8=81?= Date: Tue, 8 Mar 2022 20:32:05 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81Strict=20Open=20Xml?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../OpenXml/ExcelOpenXmlSheetReader.cs | 65 +++++++------------ src/MiniExcel/OpenXml/ExcelOpenXmlStyles.cs | 16 ++--- src/MiniExcel/Utils/StringHelper.cs | 8 +-- src/MiniExcel/Utils/XmlReaderHelper.cs | 23 ++++++- tests/MiniExcelTests/MiniExcelOpenXmlTests.cs | 8 +++ 5 files changed, 65 insertions(+), 55 deletions(-) diff --git a/src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs b/src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs index 3b3b46f..a7d1472 100644 --- a/src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs +++ b/src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs @@ -74,17 +74,17 @@ namespace MiniExcelLibs.OpenXml using (var sheetStream = sheetEntry.Open()) using (XmlReader reader = XmlReader.Create(sheetStream, _xmlSettings)) { - if (!IsStartElement(reader, "worksheet", _ns)) + if (!XmlReaderHelper.IsStartElement(reader, "worksheet", _ns)) yield break; while (reader.Read()) { - if (IsStartElement(reader, "mergeCells", _ns)) + if (XmlReaderHelper.IsStartElement(reader, "mergeCells", _ns)) { if (!XmlReaderHelper.ReadFirstContent(reader)) yield break; while (!reader.EOF) { - if (IsStartElement(reader, "mergeCell", _ns)) + if (XmlReaderHelper.IsStartElement(reader, "mergeCell", _ns)) { var @ref = reader.GetAttribute("ref"); var refs = @ref.Split(':'); @@ -135,7 +135,7 @@ namespace MiniExcelLibs.OpenXml { while (reader.Read()) { - if (IsStartElement(reader, "c", _ns)) + if (XmlReaderHelper.IsStartElement(reader, "c", _ns)) { var r = reader.GetAttribute("r"); if (r != null) @@ -155,7 +155,7 @@ namespace MiniExcelLibs.OpenXml } } //this method logic depends on dimension to get maxcolumnIndex, if without dimension then it need to foreach all rows first time to get maxColumn and maxRowColumn - else if (IsStartElement(reader, "dimension", _ns)) + else if (XmlReaderHelper.IsStartElement(reader, "dimension", _ns)) { var @ref = reader.GetAttribute("ref"); if (string.IsNullOrEmpty(@ref)) @@ -179,20 +179,20 @@ namespace MiniExcelLibs.OpenXml using (var sheetStream = sheetEntry.Open()) using (XmlReader reader = XmlReader.Create(sheetStream, _xmlSettings)) { - if (!IsStartElement(reader, "worksheet", _ns)) + if (!XmlReaderHelper.IsStartElement(reader, "worksheet", _ns)) yield break; if (!XmlReaderHelper.ReadFirstContent(reader)) yield break; while (!reader.EOF) { - if (IsStartElement(reader, "sheetData", _ns)) + if (XmlReaderHelper.IsStartElement(reader, "sheetData", _ns)) { if (!XmlReaderHelper.ReadFirstContent(reader)) continue; while (!reader.EOF) { - if (IsStartElement(reader, "row", _ns)) + if (XmlReaderHelper.IsStartElement(reader, "row", _ns)) { maxRowIndex++; @@ -204,7 +204,7 @@ namespace MiniExcelLibs.OpenXml var cellIndex = -1; while (!reader.EOF) { - if (IsStartElement(reader, "c", _ns)) + if (XmlReaderHelper.IsStartElement(reader, "c", _ns)) { cellIndex++; maxColumnIndex = Math.Max(maxColumnIndex, cellIndex); @@ -237,7 +237,7 @@ namespace MiniExcelLibs.OpenXml using (var sheetStream = sheetEntry.Open()) using (XmlReader reader = XmlReader.Create(sheetStream, _xmlSettings)) { - if (!IsStartElement(reader, "worksheet", _ns)) + if (!XmlReaderHelper.IsStartElement(reader, "worksheet", _ns)) yield break; if (!XmlReaderHelper.ReadFirstContent(reader)) @@ -245,7 +245,7 @@ namespace MiniExcelLibs.OpenXml while (!reader.EOF) { - if (IsStartElement(reader, "sheetData", _ns)) + if (XmlReaderHelper.IsStartElement(reader, "sheetData", _ns)) { if (!XmlReaderHelper.ReadFirstContent(reader)) continue; @@ -256,7 +256,7 @@ namespace MiniExcelLibs.OpenXml bool isFirstRow = true; while (!reader.EOF) { - if (IsStartElement(reader, "row", _ns)) + if (XmlReaderHelper.IsStartElement(reader, "row", _ns)) { nextRowIndex = rowIndex + 1; if (int.TryParse(reader.GetAttribute("r"), out int arValue)) @@ -294,7 +294,7 @@ namespace MiniExcelLibs.OpenXml var columnIndex = withoutCR ? -1 : 0; while (!reader.EOF) { - if (IsStartElement(reader, "c", _ns)) + if (XmlReaderHelper.IsStartElement(reader, "c", _ns)) { var aS = reader.GetAttribute("s"); var aR = reader.GetAttribute("r"); @@ -477,7 +477,7 @@ namespace MiniExcelLibs.OpenXml { using (var reader = XmlReader.Create(stream)) { - if (!IsStartElement(reader, "sst", _ns)) + if (!XmlReaderHelper.IsStartElement(reader, "sst", _ns)) yield break; if (!XmlReaderHelper.ReadFirstContent(reader)) @@ -485,7 +485,7 @@ namespace MiniExcelLibs.OpenXml while (!reader.EOF) { - if (IsStartElement(reader, "si", _ns)) + if (XmlReaderHelper.IsStartElement(reader, "si", _ns)) { var value = StringHelper.ReadStringItem(reader); yield return value; @@ -510,7 +510,7 @@ namespace MiniExcelLibs.OpenXml using (var stream = entries.Single(w => w.FullName == "xl/workbook.xml").Open()) using (XmlReader reader = XmlReader.Create(stream, _xmlSettings)) { - if (!IsStartElement(reader, "workbook", _ns)) + if (!XmlReaderHelper.IsStartElement(reader, "workbook", _ns)) yield break; if (!XmlReaderHelper.ReadFirstContent(reader)) @@ -518,19 +518,19 @@ namespace MiniExcelLibs.OpenXml while (!reader.EOF) { - if (IsStartElement(reader, "sheets", _ns)) + if (XmlReaderHelper.IsStartElement(reader, "sheets", _ns)) { if (!XmlReaderHelper.ReadFirstContent(reader)) continue; while (!reader.EOF) { - if (IsStartElement(reader, "sheet", _ns)) + if (XmlReaderHelper.IsStartElement(reader, "sheet", _ns)) { yield return new SheetRecord( reader.GetAttribute("name"), uint.Parse(reader.GetAttribute("sheetId")), - GetAttribute(reader, "id", _relationshiopNs) + XmlReaderHelper.GetAttribute(reader, "id", _relationshiopNs) ); reader.Skip(); } @@ -555,7 +555,7 @@ namespace MiniExcelLibs.OpenXml using (var stream = entries.Single(w => w.FullName == "xl/_rels/workbook.xml.rels").Open()) using (XmlReader reader = XmlReader.Create(stream, _xmlSettings)) { - if (!IsStartElement(reader, "Relationships", "http://schemas.openxmlformats.org/package/2006/relationships")) + if (!XmlReaderHelper.IsStartElement(reader, "Relationships", "http://schemas.openxmlformats.org/package/2006/relationships")) return null; if (!XmlReaderHelper.ReadFirstContent(reader)) @@ -563,7 +563,7 @@ namespace MiniExcelLibs.OpenXml while (!reader.EOF) { - if (IsStartElement(reader, "Relationship", "http://schemas.openxmlformats.org/package/2006/relationships")) + if (XmlReaderHelper.IsStartElement(reader, "Relationship", "http://schemas.openxmlformats.org/package/2006/relationships")) { string rid = reader.GetAttribute("Id"); foreach (var sheet in sheetRecords) @@ -618,13 +618,13 @@ namespace MiniExcelLibs.OpenXml object value = null; while (!reader.EOF) { - if (IsStartElement(reader, "v", _ns)) + if (XmlReaderHelper.IsStartElement(reader, "v", _ns)) { string rawValue = reader.ReadElementContentAsString(); if (!string.IsNullOrEmpty(rawValue)) ConvertCellValue(rawValue, aT, xfIndex, out value); } - else if (IsStartElement(reader, "is", _ns)) + else if (XmlReaderHelper.IsStartElement(reader, "is", _ns)) { string rawValue = StringHelper.ReadStringItem(reader); if (!string.IsNullOrEmpty(rawValue)) @@ -726,24 +726,5 @@ namespace MiniExcelLibs.OpenXml { return Task.Run(() => Query(sheetName, startCell)); } - - private bool IsStartElement(XmlReader reader, string name, params string[] nss) - { - return nss.Any(s => reader.IsStartElement(name, s)); - } - - private string GetAttribute(XmlReader reader, string name, params string[] nss) - { - foreach (var ns in nss) - { - var attribute = reader.GetAttribute(name, ns); - if (attribute != null) - { - return attribute; - } - } - - return null; - } } } diff --git a/src/MiniExcel/OpenXml/ExcelOpenXmlStyles.cs b/src/MiniExcel/OpenXml/ExcelOpenXmlStyles.cs index 9329d2e..09d0137 100644 --- a/src/MiniExcel/OpenXml/ExcelOpenXmlStyles.cs +++ b/src/MiniExcel/OpenXml/ExcelOpenXmlStyles.cs @@ -7,7 +7,7 @@ internal class ExcelOpenXmlStyles { - private const string _ns = Config.SpreadsheetmlXmlns; + private static readonly string[] _ns = { Config.SpreadsheetmlXmlns, Config.SpreadsheetmlXmlStrictns }; private readonly Dictionary _cellXfs = new Dictionary(); private readonly Dictionary _cellStyleXfs = new Dictionary(); private readonly Dictionary _customFormats = new Dictionary(); @@ -16,13 +16,13 @@ { using (var Reader = zip.GetXmlReader(@"xl/styles.xml")) { - if (!Reader.IsStartElement("styleSheet", _ns)) + if (!XmlReaderHelper.IsStartElement(Reader, "styleSheet", _ns)) return; if (!XmlReaderHelper.ReadFirstContent(Reader)) return; while (!Reader.EOF) { - if (Reader.IsStartElement("cellXfs", _ns)) + if (XmlReaderHelper.IsStartElement(Reader,"cellXfs", _ns)) { if (!XmlReaderHelper.ReadFirstContent(Reader)) continue; @@ -30,7 +30,7 @@ var index = 0; while (!Reader.EOF) { - if (Reader.IsStartElement("xf", _ns)) + if (XmlReaderHelper.IsStartElement(Reader,"xf", _ns)) { int.TryParse(Reader.GetAttribute("xfId"), out var xfId); int.TryParse(Reader.GetAttribute("numFmtId"), out var numFmtId); @@ -42,7 +42,7 @@ break; } } - else if (Reader.IsStartElement("cellStyleXfs", _ns)) + else if (XmlReaderHelper.IsStartElement(Reader,"cellStyleXfs", _ns)) { if (!XmlReaderHelper.ReadFirstContent(Reader)) continue; @@ -50,7 +50,7 @@ var index = 0; while (!Reader.EOF) { - if (Reader.IsStartElement("xf", _ns)) + if (XmlReaderHelper.IsStartElement(Reader,"xf", _ns)) { int.TryParse(Reader.GetAttribute("xfId"), out var xfId); int.TryParse(Reader.GetAttribute("numFmtId"), out var numFmtId); @@ -63,14 +63,14 @@ break; } } - else if (Reader.IsStartElement("numFmts", _ns)) + else if (XmlReaderHelper.IsStartElement(Reader,"numFmts", _ns)) { if (!XmlReaderHelper.ReadFirstContent(Reader)) continue; while (!Reader.EOF) { - if (Reader.IsStartElement("numFmt", _ns)) + if (XmlReaderHelper.IsStartElement(Reader,"numFmt", _ns)) { int.TryParse(Reader.GetAttribute("numFmtId"), out var numFmtId); var formatCode = Reader.GetAttribute("formatCode"); diff --git a/src/MiniExcel/Utils/StringHelper.cs b/src/MiniExcel/Utils/StringHelper.cs index 5009349..da5064f 100644 --- a/src/MiniExcel/Utils/StringHelper.cs +++ b/src/MiniExcel/Utils/StringHelper.cs @@ -8,7 +8,7 @@ internal static class StringHelper { - private const string _ns = Config.SpreadsheetmlXmlns; + private static readonly string[] _ns = { Config.SpreadsheetmlXmlns, Config.SpreadsheetmlXmlStrictns }; public static string GetLetter(string content) { //TODO:need to chekc @@ -31,12 +31,12 @@ while (!reader.EOF) { - if (reader.IsStartElement("t", _ns)) + if (XmlReaderHelper.IsStartElement(reader,"t", _ns)) { // There are multiple in a . Concatenate within an . result.Append(reader.ReadElementContentAsString()); } - else if (reader.IsStartElement("r", _ns)) + else if (XmlReaderHelper.IsStartElement(reader,"r", _ns)) { result.Append(ReadRichTextRun(reader)); } @@ -60,7 +60,7 @@ while (!reader.EOF) { - if (reader.IsStartElement("t", _ns)) + if (XmlReaderHelper.IsStartElement(reader,"t", _ns)) { result.Append(reader.ReadElementContentAsString()); } diff --git a/src/MiniExcel/Utils/XmlReaderHelper.cs b/src/MiniExcel/Utils/XmlReaderHelper.cs index 575e108..f46baa7 100644 --- a/src/MiniExcel/Utils/XmlReaderHelper.cs +++ b/src/MiniExcel/Utils/XmlReaderHelper.cs @@ -1,4 +1,6 @@ -namespace MiniExcelLibs.Utils +using System.Linq; + +namespace MiniExcelLibs.Utils { using System.Xml; @@ -53,6 +55,25 @@ reader.Skip(); return true; } + + public static bool IsStartElement(XmlReader reader, string name, params string[] nss) + { + return nss.Any(s => reader.IsStartElement(name, s)); + } + + public static string GetAttribute(XmlReader reader, string name, params string[] nss) + { + foreach (var ns in nss) + { + var attribute = reader.GetAttribute(name, ns); + if (attribute != null) + { + return attribute; + } + } + + return null; + } } } diff --git a/tests/MiniExcelTests/MiniExcelOpenXmlTests.cs b/tests/MiniExcelTests/MiniExcelOpenXmlTests.cs index 9c2863f..d9e421a 100644 --- a/tests/MiniExcelTests/MiniExcelOpenXmlTests.cs +++ b/tests/MiniExcelTests/MiniExcelOpenXmlTests.cs @@ -1152,6 +1152,14 @@ namespace MiniExcelLibs.Tests var path = @"../../../../../samples/xlsx/TestStrictOpenXml.xlsx"; var columns = MiniExcel.GetColumns(path); Assert.Equal(new[] { "A", "B", "C" }, columns); + + var rows = MiniExcel.Query(path).ToList(); + Assert.Equal(rows[0].A , "title1"); + Assert.Equal(rows[0].B , "title2"); + Assert.Equal(rows[0].C , "title3"); + Assert.Equal(rows[1].A , "value1"); + Assert.Equal(rows[1].B , "value2"); + Assert.Equal(rows[1].C , "value3"); } } } \ No newline at end of file