Finish : [Query Support Sheet Xml c without r · Issue #2 · shps951023/MiniExcel](https://github.com/shps951023/MiniExcel/issues/2)

This commit is contained in:
wei 2021-03-16 20:12:04 +08:00
parent 0deae6c6bf
commit f3c9caa777
7 changed files with 3198 additions and 75 deletions

View File

@ -0,0 +1,126 @@
<Query Kind="Program">
<NuGetReference>Dapper</NuGetReference>
<NuGetReference>MiniExcel</NuGetReference>
<NuGetReference>System.Data.SqlClient</NuGetReference>
<Namespace>System.Globalization</Namespace>
<RemoveNamespace>System.Collections</RemoveNamespace>
<RemoveNamespace>System.Collections.Generic</RemoveNamespace>
<RemoveNamespace>System.Data</RemoveNamespace>
<RemoveNamespace>System.Linq</RemoveNamespace>
<RemoveNamespace>System.Linq.Expressions</RemoveNamespace>
<RemoveNamespace>System.Reflection</RemoveNamespace>
<RemoveNamespace>System.Text</RemoveNamespace>
<RemoveNamespace>System.Text.RegularExpressions</RemoveNamespace>
<RemoveNamespace>System.Threading</RemoveNamespace>
<RemoveNamespace>System.Transactions</RemoveNamespace>
<RemoveNamespace>System.Xml.Linq</RemoveNamespace>
<RemoveNamespace>System.Xml.XPath</RemoveNamespace>
</Query>
void Main()
{
Stopwatch sw = new Stopwatch();
sw.Start();
Console.WriteLine("start memory usage: " + System.Diagnostics.Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024) + $"MB");
int maxCellColumn = -1;
int maxRowCount = -1; // number of rows with cell records
var path = @"C:\Users\Wei\Downloads\Test1,000,000x10\xl\worksheets\sheet1.xml";
using (var stream = File.OpenRead(path))
using (var reader = XmlReader.Create(stream,XmlSettings))
{
while (reader.Read()) //3784ms
{
//<dimension ref="A1:J1000000"/>
if( reader.IsStartElement("c") ) //4246ms
{
//var r = reader.GetAttribute("r"); //4829ms
if(ReferenceHelper.ParseReference(reader.GetAttribute("r"),out var column,out var row)) //5600ms
{
column = column - 1;
row = row - 1;
maxRowCount = Math.Max(maxRowCount, row);
maxCellColumn = Math.Max(maxCellColumn, column); //5701ms
}
}
else if (reader.IsStartElement("dimension")) //6159ms > 5999ms
{
var @ref = reader.GetAttribute("ref");
if (string.IsNullOrEmpty(@ref))
throw new InvalidOperationException("Without sheet dimension data");
var rs = @ref.Split(':');
if (ReferenceHelper.ParseReference(rs[1], out int cIndex, out int rIndex))
{
maxRowCount = cIndex - 1;
maxCellColumn = rIndex - 1;
break;
}
else
throw new InvalidOperationException("Invaild sheet dimension start data");
}
}
Console.WriteLine($"maxRowCount : {maxRowCount} , maxCellColumn : {maxCellColumn}");
Console.WriteLine("end memory usage: " + System.Diagnostics.Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024) + $"MB & run time : {sw.ElapsedMilliseconds}ms");
}
}
internal static class ReferenceHelper
{
/// <summary>
/// Logic for the Excel dimensions. Ex: A15
/// </summary>
/// <param name="value">The value.</param>
/// <param name="column">The column, 1-based.</param>
/// <param name="row">The row, 1-based.</param>
public static bool ParseReference(string value, out int column, out int row)
{
column = 0;
var position = 0;
const int offset = 'A' - 1;
if (value != null)
{
while (position < value.Length)
{
var c = value[position];
if (c >= 'A' && c <= 'Z')
{
position++;
column *= 26;
column += c - offset;
continue;
}
if (char.IsDigit(c))
break;
position = 0;
break;
}
}
if (position == 0)
{
column = 0;
row = 0;
return false;
}
if (!int.TryParse(value.Substring(position), NumberStyles.None, CultureInfo.InvariantCulture, out row))
{
return false;
}
return true;
}
}
// You can define other methods, fields, classes and namespaces here
private static readonly XmlReaderSettings XmlSettings = new XmlReaderSettings
{
IgnoreComments = true,
IgnoreWhitespace = true,
XmlResolver = null,
};

View File

@ -1,2 +1,222 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"><dimension ref="A1:K7"/><sheetViews><sheetView tabSelected="1" workbookViewId="0"><selection activeCell="E1" sqref="E1:E1048576"/></sheetView></sheetViews><sheetFormatPr defaultRowHeight="15"/><cols><col min="1" max="1" width="10.140625" bestFit="1" customWidth="1"/><col min="5" max="5" width="23.140625" style="7" bestFit="1" customWidth="1"/></cols><sheetData><row r="1" spans="1:11"><c r="A1" s="3"><v>39000</v></c><c r="B1" s="2"><v>1</v></c><c r="C1" s="2"><v>1.02</v></c><c r="D1" s="2" t="s"><v>1</v></c><c r="E1" s="6"><v>39814.458333333336</v></c><c r="F1" s="2"><v>6</v></c><c r="G1" s="2"><v>7</v></c><c r="H1" s="2"><v>8</v></c><c r="I1" s="2"><v>9</v></c><c r="J1" s="2"><v>10</v></c><c r="K1" s="4"><v>0.45833333333333331</v></c></row><row r="2" spans="1:11"><c r="A2" s="3"><v>39000</v></c><c r="B2" s="2"><v>2</v></c><c r="C2" s="2"><v>2.04</v></c><c r="D2" s="2" t="s"><v>2</v></c><c r="E2" s="6"><v>39814.458333333336</v></c><c r="F2" s="2"/><c r="G2" s="2"><v>7</v></c><c r="H2" s="2"><v>8</v></c><c r="I2" s="2"><v>9</v></c><c r="J2" s="2"><v>10</v></c><c r="K2" s="5"><v>0.46667824074074077</v></c></row><row r="3" spans="1:11"><c r="A3" s="3"><v>39000</v></c><c r="B3" s="2"><v>3</v></c><c r="C3" s="2"><v>4.08</v></c><c r="D3" s="2" t="s"><v>3</v></c><c r="E3" s="6"><v>39815.458333333336</v></c><c r="F3" s="2"/><c r="G3" s="2"/><c r="H3" s="2"><v>8</v></c><c r="I3" s="2"><v>9</v></c><c r="J3" s="2"><v>10</v></c><c r="K3" s="4"><v>0.4597222222222222</v></c></row><row r="4" spans="1:11"><c r="A4" s="3"><v>39001</v></c><c r="B4" s="2"><v>5</v></c><c r="C4" s="2"><v>8.16</v></c><c r="D4" s="2" t="s"><v>4</v></c><c r="E4" s="6"><v>39816.458333333336</v></c><c r="F4" s="2"/><c r="G4" s="2"/><c r="H4" s="2"/><c r="I4" s="2"><v>9</v></c><c r="J4" s="2"><v>10</v></c><c r="K4" s="4"><v>0.50138888888888899</v></c></row><row r="5" spans="1:11"><c r="A5" s="3"><v>39031</v></c><c r="B5" s="2"><v>8</v></c><c r="C5" s="2"><v>16.32</v></c><c r="D5" s="2"/><c r="E5" s="6"><v>39817.458333333336</v></c><c r="F5" s="2"/><c r="G5" s="2"/><c r="H5" s="2"/><c r="I5" s="2"/><c r="J5" s="2"><v>10</v></c><c r="K5" s="4"><v>0.54305555555555596</v></c></row><row r="6" spans="1:11"><c r="A6" s="3"><v>39000</v></c><c r="B6" s="2"><v>13</v></c><c r="C6" s="2"><v>32.64</v></c><c r="D6" s="2"/><c r="E6" s="6"><v>39818.458333333336</v></c><c r="F6" s="2"/><c r="G6" s="2"/><c r="H6" s="2"/><c r="I6" s="2"/><c r="J6" s="2"><v>10</v></c><c r="K6" s="4"><v>0.58472222222222203</v></c></row><row r="7" spans="1:11"><c r="A7" s="3"><v>39000</v></c><c r="B7" s="2"><v>21</v></c><c r="C7" s="2"><v>65.28</v></c><c r="D7" s="2"/><c r="E7" s="6"><v>39819.458333333336</v></c><c r="F7" s="2"/><c r="G7" s="2"/><c r="H7" s="2"/><c r="I7" s="2"/><c r="J7" s="2"><v>10</v></c><c r="K7" s="4"><v>0.62638888888888899</v></c></row></sheetData><pageMargins left="0.7" right="0.7" top="0.75" bottom="0.75" header="0.3" footer="0.3"/><pageSetup paperSize="9" orientation="portrait" r:id="rId1"/></worksheet>
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
<dimension ref="A1:K7"/>
<sheetViews>
<sheetView tabSelected="1" workbookViewId="0">
<selection activeCell="E1" sqref="E1:E1048576"/>
</sheetView>
</sheetViews>
<sheetFormatPr defaultRowHeight="15"/>
<cols>
<col min="1" max="1" width="10.140625" bestFit="1" customWidth="1"/>
<col min="5" max="5" width="23.140625" style="7" bestFit="1" customWidth="1"/>
</cols>
<sheetData>
<row r="1" spans="1:11">
<c r="A1" s="3">
<v>39000</v>
</c>
<c r="B1" s="2">
<v>1</v>
</c>
<c r="C1" s="2">
<v>1.02</v>
</c>
<c r="D1" s="2" t="s">
<v>1</v>
</c>
<c r="E1" s="6">
<v>39814.458333333336</v>
</c>
<c r="F1" s="2">
<v>6</v>
</c>
<c r="G1" s="2">
<v>7</v>
</c>
<c r="H1" s="2">
<v>8</v>
</c>
<c r="I1" s="2">
<v>9</v>
</c>
<c r="J1" s="2">
<v>10</v>
</c>
<c r="K1" s="4">
<v>0.45833333333333331</v>
</c>
</row>
<row r="2" spans="1:11">
<c r="A2" s="3">
<v>39000</v>
</c>
<c r="B2" s="2">
<v>2</v>
</c>
<c r="C2" s="2">
<v>2.04</v>
</c>
<c r="D2" s="2" t="s">
<v>2</v>
</c>
<c r="E2" s="6">
<v>39814.458333333336</v>
</c>
<c r="F2" s="2"/>
<c r="G2" s="2">
<v>7</v>
</c>
<c r="H2" s="2">
<v>8</v>
</c>
<c r="I2" s="2">
<v>9</v>
</c>
<c r="J2" s="2">
<v>10</v>
</c>
<c r="K2" s="5">
<v>0.46667824074074077</v>
</c>
</row>
<row r="3" spans="1:11">
<c r="A3" s="3">
<v>39000</v>
</c>
<c r="B3" s="2">
<v>3</v>
</c>
<c r="C3" s="2">
<v>4.08</v>
</c>
<c r="D3" s="2" t="s">
<v>3</v>
</c>
<c r="E3" s="6">
<v>39815.458333333336</v>
</c>
<c r="F3" s="2"/>
<c r="G3" s="2"/>
<c r="H3" s="2">
<v>8</v>
</c>
<c r="I3" s="2">
<v>9</v>
</c>
<c r="J3" s="2">
<v>10</v>
</c>
<c r="K3" s="4">
<v>0.4597222222222222</v>
</c>
</row>
<row r="4" spans="1:11">
<c r="A4" s="3">
<v>39001</v>
</c>
<c r="B4" s="2">
<v>5</v>
</c>
<c r="C4" s="2">
<v>8.16</v>
</c>
<c r="D4" s="2" t="s">
<v>4</v>
</c>
<c r="E4" s="6">
<v>39816.458333333336</v>
</c>
<c r="F4" s="2"/>
<c r="G4" s="2"/>
<c r="H4" s="2"/>
<c r="I4" s="2">
<v>9</v>
</c>
<c r="J4" s="2">
<v>10</v>
</c>
<c r="K4" s="4">
<v>0.50138888888888899</v>
</c>
</row>
<row r="5" spans="1:11">
<c r="A5" s="3">
<v>39031</v>
</c>
<c r="B5" s="2">
<v>8</v>
</c>
<c r="C5" s="2">
<v>16.32</v>
</c>
<c r="D5" s="2"/>
<c r="E5" s="6">
<v>39817.458333333336</v>
</c>
<c r="F5" s="2"/>
<c r="G5" s="2"/>
<c r="H5" s="2"/>
<c r="I5" s="2"/>
<c r="J5" s="2">
<v>10</v>
</c>
<c r="K5" s="4">
<v>0.54305555555555596</v>
</c>
</row>
<row r="6" spans="1:11">
<c r="A6" s="3">
<v>39000</v>
</c>
<c r="B6" s="2">
<v>13</v>
</c>
<c r="C6" s="2">
<v>32.64</v>
</c>
<c r="D6" s="2"/>
<c r="E6" s="6">
<v>39818.458333333336</v>
</c>
<c r="F6" s="2"/>
<c r="G6" s="2"/>
<c r="H6" s="2"/>
<c r="I6" s="2"/>
<c r="J6" s="2">
<v>10</v>
</c>
<c r="K6" s="4">
<v>0.58472222222222203</v>
</c>
</row>
<row r="7" spans="1:11">
<c r="A7" s="3">
<v>39000</v>
</c>
<c r="B7" s="2">
<v>21</v>
</c>
<c r="C7" s="2">
<v>65.28</v>
</c>
<c r="D7" s="2"/>
<c r="E7" s="6">
<v>39819.458333333336</v>
</c>
<c r="F7" s="2"/>
<c r="G7" s="2"/>
<c r="H7" s="2"/>
<c r="I7" s="2"/>
<c r="J7" s="2">
<v>10</v>
</c>
<c r="K7" s="4">
<v>0.62638888888888899</v>
</c>
</row>
</sheetData>
<pageMargins left="0.7" right="0.7" top="0.75" bottom="0.75" header="0.3" footer="0.3"/>
<pageSetup paperSize="9" orientation="portrait" r:id="rId1"/>
</worksheet>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@ -134,10 +134,10 @@ namespace MiniExcelLibs.OpenXml
private List<SheetRecord> _sheetRecords = null;
internal void ReadWorkbookRels(ReadOnlyCollection<ZipArchiveEntry> entries)
{
_sheetRecords= ReadWorkbook(entries).ToList();
_sheetRecords = ReadWorkbook(entries).ToList();
//_styles = ReadStyle(entries).ToList();
using (var stream = entries.Single(w=> w.FullName == "xl/_rels/workbook.xml.rels").Open())
using (var stream = entries.Single(w => w.FullName == "xl/_rels/workbook.xml.rels").Open())
using (XmlReader reader = XmlReader.Create(stream, XmlSettings))
{
if (!reader.IsStartElement("Relationships", "http://schemas.openxmlformats.org/package/2006/relationships"))
@ -176,6 +176,8 @@ namespace MiniExcelLibs.OpenXml
private static Dictionary<int, string> _SharedStrings;
private const string ns = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
internal IEnumerable<IDictionary<string, object>> QueryImpl(Stream stream, bool UseHeaderRow = false)
{
using (ZipArchive archive = new ZipArchive(stream, ZipArchiveMode.Read, false, UTF8Encoding.UTF8))
@ -195,25 +197,38 @@ namespace MiniExcelLibs.OpenXml
else
firstSheetEntry = sheets.Single();
using (var firstSheetEntryStream = firstSheetEntry.Open())
{
using (XmlReader reader = XmlReader.Create(firstSheetEntryStream, XmlSettings))
{
const string ns = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
if (!reader.IsStartElement("worksheet", ns))
yield break;
if (!XmlReaderHelper.ReadFirstContent(reader))
yield break;
// TODO: need to optimize performance
var withoutCR = false;
var maxRowIndex = -1;
var maxColumnIndex = -1;
while (!reader.EOF)
using (var firstSheetEntryStream = firstSheetEntry.Open())
using (XmlReader reader = XmlReader.Create(firstSheetEntryStream, XmlSettings))
{
//TODO: will dimension after sheetData?
while (reader.Read())
{
if (reader.IsStartElement("c",ns))
{
var r = reader.GetAttribute("r");
if (r != null)
{
if (ReferenceHelper.ParseReference(r, out var column, out var row))
{
column = column - 1;
row = row - 1;
maxRowIndex = Math.Max(maxRowIndex, row);
maxColumnIndex = Math.Max(maxColumnIndex, column);
}
}
else
{
withoutCR = true;
break;
}
}
//this method logic depends on dimension to get maxcolumnIndex, if without dimension then it need to foreach all rows first time to get maxColumn and maxRowColumn
if (reader.IsStartElement("dimension", ns))
else if (reader.IsStartElement("dimension", ns))
{
var @ref = reader.GetAttribute("ref");
if (string.IsNullOrEmpty(@ref))
@ -223,17 +238,88 @@ namespace MiniExcelLibs.OpenXml
{
maxColumnIndex = cIndex - 1;
maxRowIndex = rIndex - 1;
break;
}
else
throw new InvalidOperationException("Invaild sheet dimension start data");
}
}
}
if (withoutCR)
{
using (var firstSheetEntryStream = firstSheetEntry.Open())
using (XmlReader reader = XmlReader.Create(firstSheetEntryStream, XmlSettings))
{
if (!reader.IsStartElement("worksheet", ns))
yield break;
if (!XmlReaderHelper.ReadFirstContent(reader))
yield break;
while (!reader.EOF)
{
if (reader.IsStartElement("sheetData", ns))
{
if (!XmlReaderHelper.ReadFirstContent(reader))
{
continue;
while (!reader.EOF)
{
if (reader.IsStartElement("row", ns))
{
maxRowIndex++;
if (!XmlReaderHelper.ReadFirstContent(reader))
continue;
//Cells
{
var cellIndex = -1;
while (!reader.EOF)
{
if (reader.IsStartElement("c", ns))
{
cellIndex++;
maxColumnIndex = Math.Max(maxColumnIndex, cellIndex);
}
if (!XmlReaderHelper.SkipContent(reader))
break;
}
}
}
else if (!XmlReaderHelper.SkipContent(reader))
{
break;
}
}
}
else if (!XmlReaderHelper.SkipContent(reader))
{
break;
}
}
}
}
using (var firstSheetEntryStream = firstSheetEntry.Open())
using (XmlReader reader = XmlReader.Create(firstSheetEntryStream, XmlSettings))
{
if (!reader.IsStartElement("worksheet", ns))
yield break;
if (!XmlReaderHelper.ReadFirstContent(reader))
yield break;
while (!reader.EOF)
{
if (reader.IsStartElement("sheetData", ns))
{
if (!XmlReaderHelper.ReadFirstContent(reader))
continue;
Dictionary<int, string> headRows = new Dictionary<int, string>();
int rowIndex = -1;
int nextRowIndex = 0;
@ -264,12 +350,12 @@ namespace MiniExcelLibs.OpenXml
// Set Cells
{
var cell = UseHeaderRow ? Helpers.GetEmptyExpandoObject(headRows) : Helpers.GetEmptyExpandoObject(maxColumnIndex);
var columnIndex = 0;
var columnIndex = withoutCR ? -1 : 0;
while (!reader.EOF)
{
if (reader.IsStartElement("c", ns))
{
var cellValue = ReadCell(reader, columnIndex, out var _columnIndex);
var cellValue = ReadCell(reader, columnIndex, withoutCR, out var _columnIndex);
columnIndex = _columnIndex;
//if not using First Head then using 1,2,3 as index
@ -308,17 +394,18 @@ namespace MiniExcelLibs.OpenXml
}
}
}
}
private object ReadCell(XmlReader reader, int nextColumnIndex, out int columnIndex)
private object ReadCell(XmlReader reader, int nextColumnIndex,bool withoutCR, out int columnIndex)
{
int xfIndex = -1;
var aS = reader.GetAttribute("s");
var aT = reader.GetAttribute("t");
var aR = reader.GetAttribute("r");
if(withoutCR)
columnIndex = nextColumnIndex + 1;
//TODO:need to check only need nextColumnIndex or columnIndex
if (ReferenceHelper.ParseReference(aR, out int referenceColumn, out _))
else if (ReferenceHelper.ParseReference(aR, out int referenceColumn, out _))
columnIndex = referenceColumn - 1; // ParseReference is 1-based
else
columnIndex = nextColumnIndex;
@ -357,7 +444,7 @@ namespace MiniExcelLibs.OpenXml
return value;
}
private void ConvertCellValue(string rawValue, string aT,int xfIndex, out object value)
private void ConvertCellValue(string rawValue, string aT, int xfIndex, out object value)
{
const NumberStyles style = NumberStyles.Any;
var invariantCulture = CultureInfo.InvariantCulture;

View File

@ -226,6 +226,31 @@ namespace MiniExcelLibs.Tests
}
}
[Fact()]
public void QuerySheetWithoutRAttribute()
{
var path = @"..\..\..\..\..\samples\xlsx\TestWihoutRAttribute.xlsx";
using (var stream = File.OpenRead(path))
{
var rows = stream.Query().ToList();
var keys = (rows.First() as IDictionary<string, object>).Keys;
Assert.Equal(2, rows.Count());
Assert.Equal(5, keys.Count());
Assert.Equal(1, rows[0].A);
//Assert.Equal(@""" <> +}{\nHello World]", (string)rows[0].B);
Assert.Equal(null, rows[0].C);
Assert.Equal(null, rows[0].D);
Assert.Equal(null, rows[0].E);
Assert.Equal(1, rows[1].A);
Assert.Equal("\"<>+}{\\nHello World", rows[1].B);
Assert.Equal(true, rows[1].C);
Assert.Equal("2021-03-16T19:10:21", rows[1].D);
}
}
//[Theory()]
//[InlineData(@"..\..\..\..\..\samples\xlsx\ExcelDataReaderCollections\TestOpen\TestOpen.xlsx")]
// public void QueryExcelDataReaderCheckTypeMappingTest(string path)