diff --git a/README.md b/README.md index 0bdfedf..4fe6a63 100644 --- a/README.md +++ b/README.md @@ -259,6 +259,10 @@ foreach(IDictionary row in MiniExcel.Query(path)) // or var rows = MiniExcel.Query(path).Cast>(); +// or Query specified ranges (capitalized) +// A2 represents the second row of column A, C3 represents the third row of column C +// If you don't want to restrict rows, just don't include numbers +var rows = MiniExcel.QueryRange(path, startCell: "A2", endCell: "C3").Cast>(); ``` diff --git a/README.zh-CN.md b/README.zh-CN.md index 092f9a0..8b37f49 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -266,6 +266,10 @@ foreach(IDictionary row in MiniExcel.Query(path)) // or var rows = MiniExcel.Query(path).Cast>(); +// or 查询指定范围(要大写才生效哦) +// A2(左上角)代表A列的第二行,C3(右下角)代表C列的第三行 +// 如果你不想限制行,就不要包含数字 +var rows = MiniExcel.QueryRange(path, startCell: "A2", endCell: "C3").Cast>(); ``` #### 9. Query 读 Excel 返回 DataTable diff --git a/README.zh-Hant.md b/README.zh-Hant.md index f0eddf3..d0f043d 100644 --- a/README.zh-Hant.md +++ b/README.zh-Hant.md @@ -265,6 +265,10 @@ foreach(IDictionary row in MiniExcel.Query(path)) // or var rows = MiniExcel.Query(path).Cast>(); +// or 査詢指定範圍(要大寫才生效哦) +// A2(左上角)代表A列的第二行,C3(右下角)代表C列的第三行 +// 如果你不想限制行,就不要包含數位 +var rows = MiniExcel.QueryRange(path, startCell: "A2", endCell: "C3").Cast>(); ``` diff --git a/samples/xlsx/TestIssue606_Template.xlsx b/samples/xlsx/TestIssue606_Template.xlsx new file mode 100644 index 0000000..bc3c787 Binary files /dev/null and b/samples/xlsx/TestIssue606_Template.xlsx differ diff --git a/src/MiniExcel/MiniExcel.cs b/src/MiniExcel/MiniExcel.cs index abf5649..c508f65 100644 --- a/src/MiniExcel/MiniExcel.cs +++ b/src/MiniExcel/MiniExcel.cs @@ -91,6 +91,21 @@ #region range + /// + /// Extract the given range。 Only uppercase letters are effective。 + /// e.g. + /// MiniExcel.QueryRange(path, startCell: "A2", endCell: "C3") + /// A2 represents the second row of column A, C3 represents the third row of column C + /// If you don't want to restrict rows, just don't include numbers + /// + /// + /// + /// + /// + /// top left corner + /// lower right corner + /// + /// public static IEnumerable QueryRange(string path, bool useHeaderRow = false, string sheetName = null, ExcelType excelType = ExcelType.UNKNOWN, string startCell = "a1", string endCell = "", IConfiguration configuration = null) { using (var stream = FileHelper.OpenSharedRead(path)) diff --git a/src/MiniExcel/OpenXml/Config.cs b/src/MiniExcel/OpenXml/Config.cs index fed5318..fd0102b 100644 --- a/src/MiniExcel/OpenXml/Config.cs +++ b/src/MiniExcel/OpenXml/Config.cs @@ -6,5 +6,6 @@ public const string SpreadsheetmlXmlStrictns = "http://purl.oclc.org/ooxml/spreadsheetml/main"; public const string SpreadsheetmlXmlRelationshipns = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"; public const string SpreadsheetmlXmlStrictRelationshipns = "http://purl.oclc.org/ooxml/officeDocument/relationships"; + public const string SpreadsheetmlXml_x14ac = "http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac"; } } \ No newline at end of file diff --git a/src/MiniExcel/OpenXml/ExcelOpenXmlTemplate.Impl.cs b/src/MiniExcel/OpenXml/ExcelOpenXmlTemplate.Impl.cs index d3a355d..187382a 100644 --- a/src/MiniExcel/OpenXml/ExcelOpenXmlTemplate.Impl.cs +++ b/src/MiniExcel/OpenXml/ExcelOpenXmlTemplate.Impl.cs @@ -105,7 +105,9 @@ namespace MiniExcelLibs.OpenXml } } - private List XRowInfos { get; set; } + private List XRowInfos { get; set; } + + private readonly List CalcChainCellRefs = new List(); private Dictionary XMergeCellInfos { get; set; } public List NewXMergeCellInfos { get; private set; } @@ -688,7 +690,10 @@ namespace MiniExcelLibs.OpenXml var mergeBaseRowIndex = newRowIndex; newRowIndex += rowInfo.IEnumerableMercell?.Height ?? 1; - writer.Write(CleanXml(rowXml, endPrefix)); // pass StringBuilder for netcoreapp3.0 or above + + // replace formulas + ProcessFormulas( rowXml, newRowIndex ); + writer.Write(CleanXml( rowXml, endPrefix)); // pass StringBuilder for netcoreapp3.0 or above //mergecells if (rowInfo.RowMercells != null) @@ -743,30 +748,6 @@ namespace MiniExcelLibs.OpenXml else { - // convert cells starting with '$=' into formulas - var cs = row.SelectNodes($"x:c", _ns); - foreach (XmlElement c in cs) - { - /* Target: - - SUM(C2:C7) - - */ - var vs = c.SelectNodes($"x:v", _ns); - foreach (XmlElement v in vs) - { - if (!v.InnerText.StartsWith("$=")) - { - continue; - } - var fNode = c.OwnerDocument.CreateElement("f", Config.SpreadsheetmlXmlns); - fNode.InnerText = v.InnerText.Substring(2); - c.InsertBefore(fNode, v); - c.RemoveChild(v); - } - } - innerXml = row.InnerXml; - rowXml.Clear() .Append(outerXmlOpen) .AppendFormat(@" r=""{0}"">", newRowIndex) @@ -775,7 +756,10 @@ namespace MiniExcelLibs.OpenXml .Replace($"{{{{$enumrowstart}}}}", enumrowstart.ToString()) .Replace($"{{{{$enumrowend}}}}", enumrowend.ToString()) .AppendFormat("", row.Name); - writer.Write(CleanXml(rowXml, endPrefix)); // pass StringBuilder for netcoreapp3.0 or above + + ProcessFormulas( rowXml, newRowIndex ); + + writer.Write(CleanXml( rowXml, endPrefix)); // pass StringBuilder for netcoreapp3.0 or above //mergecells if (rowInfo.RowMercells != null) @@ -810,6 +794,59 @@ namespace MiniExcelLibs.OpenXml writer.Write(contents[1]); } } + + private void ProcessFormulas( StringBuilder rowXml, int rowIndex ) + { + + var rowXmlString = rowXml.ToString(); + + // exit early if possible + if ( !rowXmlString.Contains( "$=" ) ) { + return; + } + + XmlReaderSettings settings = new XmlReaderSettings { NameTable = _ns.NameTable }; + XmlParserContext context = new XmlParserContext( null, _ns, "", XmlSpace.Default ); + XmlReader reader = XmlReader.Create( new StringReader( rowXmlString ), settings, context ); + + XmlDocument d = new XmlDocument(); + d.Load( reader ); + + var row = d.FirstChild as XmlElement; + + // convert cells starting with '$=' into formulas + var cs = row.SelectNodes( $"x:c", _ns ); + for ( var ci = 0; ci < cs.Count; ci++ ) + { + var c = cs.Item( ci ) as XmlElement; + if ( c == null ) { + continue; + } + /* Target: + + SUM(C2:C7) + + */ + var vs = c.SelectNodes( $"x:v", _ns ); + foreach ( XmlElement v in vs ) + { + if ( !v.InnerText.StartsWith( "$=" ) ) + { + continue; + } + var fNode = c.OwnerDocument.CreateElement( "f", Config.SpreadsheetmlXmlns ); + fNode.InnerText = v.InnerText.Substring( 2 ); + c.InsertBefore( fNode, v ); + c.RemoveChild( v ); + + var celRef = ExcelOpenXmlUtils.ConvertXyToCell( ci + 1, rowIndex ); + CalcChainCellRefs.Add( celRef ); + + } + } + rowXml.Clear(); + rowXml.Append( row.OuterXml ); + } private static string ConvertToDateTimeString(KeyValuePair propInfo, object cellValue) { diff --git a/src/MiniExcel/OpenXml/ExcelOpenXmlTemplate.cs b/src/MiniExcel/OpenXml/ExcelOpenXmlTemplate.cs index c2a38ea..d290695 100644 --- a/src/MiniExcel/OpenXml/ExcelOpenXmlTemplate.cs +++ b/src/MiniExcel/OpenXml/ExcelOpenXmlTemplate.cs @@ -25,7 +25,8 @@ namespace MiniExcelLibs.OpenXml _isExpressionRegex = new Regex("(?<={{).*?(?=}})"); _ns = new XmlNamespaceManager(new NameTable()); _ns.AddNamespace("x", Config.SpreadsheetmlXmlns); - } + _ns.AddNamespace( "x14ac", Config.SpreadsheetmlXml_x14ac ); + } private readonly Stream _stream; private readonly OpenXmlConfiguration _configuration; @@ -118,10 +119,7 @@ namespace MiniExcelLibs.OpenXml using (var filledStream = entry.Open()) { sheetIdx++; - var filledDoc = new XmlDocument(); - filledDoc.Load(filledStream); - var filledSheetData = filledDoc.SelectSingleNode("/x:worksheet/x:sheetData", _ns); - _calcChainContent.Append(CalcChainHelper.GetCalcChainContentFromSheet(filledSheetData, _ns, sheetIdx)); + _calcChainContent.Append( CalcChainHelper.GetCalcChainContent( CalcChainCellRefs, sheetIdx ) ); } } diff --git a/src/MiniExcel/Utils/calChainHelper.cs b/src/MiniExcel/Utils/calChainHelper.cs index 6747c7a..1d40c30 100644 --- a/src/MiniExcel/Utils/calChainHelper.cs +++ b/src/MiniExcel/Utils/calChainHelper.cs @@ -1,6 +1,6 @@ -using System.IO; +using System.Collections.Generic; +using System.IO; using System.Text; -using System.Xml; namespace MiniExcelLibs.Utils { @@ -13,22 +13,18 @@ namespace MiniExcelLibs.Utils // Each element should have a r attribute that specifies the cell's address (e.g., "A1" or "B2"). // The element should also have a i attribute that specifies the index of the formula in the formulas collection (in the workbook's sheet data file). // https://learn.microsoft.com/en-us/dotnet/api/documentformat.openxml.spreadsheet.calculationchain?view=openxml-2.8.1 - public static string GetCalcChainContentFromSheet(in XmlNode sheetData, XmlNamespaceManager ns, int sheetIndex) - { + public static string GetCalcChainContent( List cellRefs, int sheetIndex ) { - StringBuilder calcChainContent = new StringBuilder(); + StringBuilder calcChainContent = new StringBuilder(); - // each c having f nodes - var cs = sheetData.SelectNodes($"x:row/x:c[./x:f]", ns); - foreach (XmlElement c in cs) - { - calcChainContent.Append($@""); - } + foreach ( string cr in cellRefs ) { + calcChainContent.Append( $@"" ); + } - return calcChainContent.ToString(); - } + return calcChainContent.ToString(); + } - public static void GenerateCalcChainSheet(Stream calcChainStream, string calcChainContent) + public static void GenerateCalcChainSheet(Stream calcChainStream, string calcChainContent) { using (var writer = new StreamWriter(calcChainStream, Encoding.UTF8)) { diff --git a/tests/MiniExcelTests/MiniExcelIssueTests.cs b/tests/MiniExcelTests/MiniExcelIssueTests.cs index 8d16a7b..e849293 100644 --- a/tests/MiniExcelTests/MiniExcelIssueTests.cs +++ b/tests/MiniExcelTests/MiniExcelIssueTests.cs @@ -1,4 +1,4 @@ -using Dapper; +using Dapper; using MiniExcelLibs.Attributes; using MiniExcelLibs.Csv; using MiniExcelLibs.Exceptions; @@ -3641,5 +3641,56 @@ MyProperty4,MyProperty1,MyProperty5,MyProperty2,MyProperty6,,MyProperty3 Assert.Equal(2, getRowsInfo.Length ); } + + + [Fact] + public void Issue606_1() + { + // excel max rows: 1,048,576 + // before changes: 1999999 => 25.8 GB mem + // after changes: 1999999 => peaks at 3.2 GB mem (10:20 min) + // after changes: 100000 => peaks at 222 MB mem (34 sec) + + var value = new + { + Title = "My Title", + OrderInfo = Enumerable + .Range( 1, 100 ) + .Select( x => new + { + Standard = "standard", + RegionName = "region", + DealerName = "department", + SalesPointName = "region", + CustomerName = "customer", + IdentityType = "aaaaaa", + IdentitySeries = "ssssss", + IdentityNumber = "nnnnn", + BirthDate = "date", + TariffPlanName = "plan", + PhoneNumber = "num", + SimCardIcc = "sss", + BisContractNumber = "eee", + CreatedAt = "dd.mm.yyyy", + UserDescription = "fhtyrhthrthrt", + UserName = "dfsfsdfds", + PaymentsAmount = "dfhgdfgadfgdfg", + OrderState = "agafgdafgadfgd", + }) + }; + + var path = Path.Combine + ( + Path.GetTempPath(), + string.Concat( nameof( MiniExcelIssueTests ), "_", nameof( Issue606_1 ), ".xlsx" ) + ); + + var templateFileName = @"../../../../../samples/xlsx/TestIssue606_Template.xlsx"; + + + MiniExcel.SaveAsByTemplate( path, Path.GetFullPath( templateFileName ), value ); + + } + } } \ No newline at end of file diff --git a/tests/MiniExcelTests/MiniExcelOpenXmlTests.cs b/tests/MiniExcelTests/MiniExcelOpenXmlTests.cs index 5155b3c..6afa983 100644 --- a/tests/MiniExcelTests/MiniExcelOpenXmlTests.cs +++ b/tests/MiniExcelTests/MiniExcelOpenXmlTests.cs @@ -166,6 +166,21 @@ namespace MiniExcelLibs.Tests } } + + [Fact] + public void QueryRangeToIDictionary() + { + var path = @"../../../../../samples/xlsx/TestCenterEmptyRow/TestCenterEmptyRow.xlsx"; + // tips:Only uppercase letters are effective + var rows = MiniExcel.QueryRange(path, startCell: "A2", endCell: "C") + .Cast>() + .ToList(); + Assert.Equal(5, rows.Count); + Assert.Equal(3, rows[0].Count); + Assert.Equal(2d, rows[1]["B"]); + Assert.Equal(null!, rows[2]["A"]); + } + [Fact()] public void CenterEmptyRowsQueryTest() {