- [Breaking Changes] Remove Query First/FirstOrDefault/Single/SingleOrDefault, user can use LINQ method do it.

- Remove Query by path
This commit is contained in:
wei 2021-03-25 10:36:04 +08:00
parent 175e8a5234
commit ea2a5e50d6
7 changed files with 256 additions and 130 deletions

View File

@ -80,7 +80,7 @@ using (var stream = File.OpenRead(path))
```C#
using (var stream = File.OpenRead(path))
Assert.Equal("HelloWorld", stream.QueryFirst().A);
Assert.Equal("HelloWorld", stream.Query().First().A);
```
performance: MiniExcel/ExcelDataReader/ClosedXML/EPPlus

View File

@ -4,9 +4,9 @@
### 0.4.0
- Support create CSV by file path or stream
- Add MiniExcel.Query("file path")
- Support custom configuration setting
- Support auto/manual specify excel type way
- [Breaking Changes] Remove Query First/FirstOrDefault/Single/SingleOrDefault, user can use LINQ method do it.
### 0.3.0
- Support SaveAs by IEnumerable of DapperRow and IDictionary<string,object>

View File

@ -10,55 +10,6 @@ namespace MiniExcelLibs.Csv
{
public class CsvReader
{
internal IEnumerable<IDictionary<string, object>> Query(string path, bool useHeaderRow, CsvConfiguration configuration)
{
if (configuration == null)
configuration = CsvConfiguration.GetDefaultConfiguration();
using (var stream = File.OpenRead(path))
//note: why duplicate code can see #124 issue
using (var reader = configuration.GetStreamReaderFunc(stream))
{
char[] seperators = { configuration.Seperator };
var row = string.Empty;
string[] read;
var firstRow = true;
Dictionary<int, string> headRows = new Dictionary<int, string>();
while ((row = reader.ReadLine()) != null)
{
read = row.Split(seperators, StringSplitOptions.None);
//header
if (useHeaderRow)
{
if (firstRow)
{
firstRow = false;
for (int i = 0; i <= read.Length - 1; i++)
headRows.Add(i, read[i]);
continue;
}
var cell = Helpers.GetEmptyExpandoObject(headRows);
for (int i = 0; i <= read.Length - 1; i++)
cell[headRows[i]] = read[i];
yield return cell;
continue;
}
//body
{
var cell = Helpers.GetEmptyExpandoObject(read.Length - 1);
for (int i = 0; i <= read.Length - 1; i++)
cell[Helpers.GetAlphabetColumnName(i)] = read[i];
yield return cell;
}
}
}
}
internal IEnumerable<IDictionary<string, object>> Query(Stream stream, bool useHeaderRow, CsvConfiguration configuration)
{
if (configuration == null)

View File

@ -41,58 +41,11 @@
}
}
public static IEnumerable<T> Query<T>(string path) where T : class, new()
{
using (var stream = File.OpenRead(path))
{
return QueryImpl<T>(stream);
}
}
public static IEnumerable<T> Query<T>(this Stream stream) where T : class, new()
{
return QueryImpl<T>(stream);
}
public static T QueryFirst<T>(this Stream stream) where T : class, new()
{
return QueryImpl<T>(stream).First();
}
public static T QueryFirstOrDefault<T>(this Stream stream) where T : class, new()
{
return QueryImpl<T>(stream).FirstOrDefault();
}
public static T QuerySingle<T>(this Stream stream) where T : class, new()
{
return QueryImpl<T>(stream).Single();
}
public static T QuerySingleOrDefault<T>(this Stream stream) where T : class, new()
{
return QueryImpl<T>(stream).SingleOrDefault();
}
public static IEnumerable<dynamic> Query(string path, bool useHeaderRow = false, ExcelType excelType = ExcelType.UNKNOWN,IConfiguration configuration=null)
{
if (excelType == ExcelType.UNKNOWN)
excelType = GetExcelType(path);
//using (var stream = File.OpenRead(path))
Stream stream = null;
{
switch (excelType)
{
case ExcelType.CSV:
return new CsvReader().Query(path, useHeaderRow, (CsvConfiguration)configuration);
case ExcelType.XLSX:
return new ExcelOpenXmlSheetReader().Query(stream, useHeaderRow);
default:
throw new NotSupportedException($"Extension : {Path.GetExtension(path)} not suppprt");
}
}
}
public static IEnumerable<dynamic> Query(this Stream stream, bool useHeaderRow = false, ExcelType excelType = ExcelType.UNKNOWN, IConfiguration configuration = null)
{
if (excelType == ExcelType.UNKNOWN)
@ -107,25 +60,5 @@
throw new NotSupportedException($"Please Issue for me");
}
}
public static dynamic QueryFirst(this Stream stream, bool useHeaderRow = false)
{
return new ExcelOpenXmlSheetReader().Query(stream, useHeaderRow).First();
}
public static dynamic QueryFirstOrDefault(this Stream stream, bool useHeaderRow = false)
{
return new ExcelOpenXmlSheetReader().Query(stream, useHeaderRow).FirstOrDefault();
}
public static dynamic QuerySingle(this Stream stream, bool useHeaderRow = false)
{
return new ExcelOpenXmlSheetReader().Query(stream, useHeaderRow).Single();
}
public static dynamic QuerySingleOrDefault(this Stream stream, bool useHeaderRow = false)
{
return new ExcelOpenXmlSheetReader().Query(stream, useHeaderRow).SingleOrDefault();
}
}
}

View File

@ -171,6 +171,258 @@ namespace MiniExcelLibs.OpenXml
}
}
internal IEnumerable<IDictionary<string, object>> Query(string path, bool UseHeaderRow = false)
{
using (var stream = File.OpenRead(path))
using (var archive = new ExcelOpenXmlZip(stream))
{
_SharedStrings = GetSharedStrings(archive);
// if sheets count > 1 need to read xl/_rels/workbook.xml.rels and
var sheets = archive.Entries.Where(w => w.FullName.StartsWith("xl/worksheets/sheet", StringComparison.OrdinalIgnoreCase)
|| w.FullName.StartsWith("/xl/worksheets/sheet", StringComparison.OrdinalIgnoreCase)
);
ZipArchiveEntry firstSheetEntry = null;
if (sheets.Count() > 1)
{
ReadWorkbookRels(archive.Entries);
firstSheetEntry = sheets.Single(w => w.FullName == $"xl/{_sheetRecords[0].Path}" || w.FullName == $"/xl/{_sheetRecords[0].Path}");
}
else
firstSheetEntry = sheets.Single();
// TODO: need to optimize performance
var withoutCR = false;
var maxRowIndex = -1;
var maxColumnIndex = -1;
//TODO: merge one open read
using (var firstSheetEntryStream = firstSheetEntry.Open())
using (XmlReader reader = XmlReader.Create(firstSheetEntryStream, XmlSettings))
{
while (reader.Read())
{
if (reader.IsStartElement("c", ns))
{
var r = reader.GetAttribute("r");
if (r != null)
{
if (ReferenceHelper.ParseReference(r, out var column, out var row))
{
column = column - 1;
row = row - 1;
maxRowIndex = Math.Max(maxRowIndex, row);
maxColumnIndex = Math.Max(maxColumnIndex, column);
}
}
else
{
withoutCR = true;
break;
}
}
//this method logic depends on dimension to get maxcolumnIndex, if without dimension then it need to foreach all rows first time to get maxColumn and maxRowColumn
else if (reader.IsStartElement("dimension", ns))
{
var @ref = reader.GetAttribute("ref");
if (string.IsNullOrEmpty(@ref))
throw new InvalidOperationException("Without sheet dimension data");
var rs = @ref.Split(':');
// issue : https://github.com/shps951023/MiniExcel/issues/102
if (ReferenceHelper.ParseReference(rs.Length == 2 ? rs[1] : rs[0], out int cIndex, out int rIndex))
{
maxColumnIndex = cIndex - 1;
maxRowIndex = rIndex - 1;
break;
}
else
throw new InvalidOperationException("Invaild sheet dimension start data");
}
}
}
if (withoutCR)
{
using (var firstSheetEntryStream = firstSheetEntry.Open())
using (XmlReader reader = XmlReader.Create(firstSheetEntryStream, XmlSettings))
{
if (!reader.IsStartElement("worksheet", ns))
yield break;
if (!XmlReaderHelper.ReadFirstContent(reader))
yield break;
while (!reader.EOF)
{
if (reader.IsStartElement("sheetData", ns))
{
if (!XmlReaderHelper.ReadFirstContent(reader))
continue;
while (!reader.EOF)
{
if (reader.IsStartElement("row", ns))
{
maxRowIndex++;
if (!XmlReaderHelper.ReadFirstContent(reader))
continue;
//Cells
{
var cellIndex = -1;
while (!reader.EOF)
{
if (reader.IsStartElement("c", ns))
{
cellIndex++;
maxColumnIndex = Math.Max(maxColumnIndex, cellIndex);
}
if (!XmlReaderHelper.SkipContent(reader))
break;
}
}
}
else if (!XmlReaderHelper.SkipContent(reader))
{
break;
}
}
}
else if (!XmlReaderHelper.SkipContent(reader))
{
break;
}
}
}
}
using (var firstSheetEntryStream = firstSheetEntry.Open())
using (XmlReader reader = XmlReader.Create(firstSheetEntryStream, XmlSettings))
{
if (!reader.IsStartElement("worksheet", ns))
yield break;
if (!XmlReaderHelper.ReadFirstContent(reader))
yield break;
while (!reader.EOF)
{
if (reader.IsStartElement("sheetData", ns))
{
if (!XmlReaderHelper.ReadFirstContent(reader))
continue;
Dictionary<int, string> headRows = new Dictionary<int, string>();
int rowIndex = -1;
int nextRowIndex = 0;
while (!reader.EOF)
{
if (reader.IsStartElement("row", ns))
{
nextRowIndex = rowIndex + 1;
if (int.TryParse(reader.GetAttribute("r"), out int arValue))
rowIndex = arValue - 1; // The row attribute is 1-based
else
rowIndex++;
if (!XmlReaderHelper.ReadFirstContent(reader))
continue;
// fill empty rows
{
if (nextRowIndex < rowIndex)
{
for (int i = nextRowIndex; i < rowIndex; i++)
if (UseHeaderRow)
yield return Helpers.GetEmptyExpandoObject(headRows);
else
yield return Helpers.GetEmptyExpandoObject(maxColumnIndex);
}
}
// Set Cells
{
var cell = UseHeaderRow ? Helpers.GetEmptyExpandoObject(headRows) : Helpers.GetEmptyExpandoObject(maxColumnIndex);
var columnIndex = withoutCR ? -1 : 0;
while (!reader.EOF)
{
if (reader.IsStartElement("c", ns))
{
var aS = reader.GetAttribute("s");
var cellValue = ReadCell(reader, columnIndex, withoutCR, out var _columnIndex);
columnIndex = _columnIndex;
// xfindex
if (!string.IsNullOrEmpty(aS))
{
int xfIndex = -1;
if (int.TryParse(aS, NumberStyles.Any, CultureInfo.InvariantCulture, out var styleIndex))
{
xfIndex = styleIndex;
}
// only when have s attribute then load styles xml data
if (_style == null)
_style = new ExcelOpenXmlStyles(archive);
//if not using First Head then using 1,2,3 as index
if (UseHeaderRow)
{
if (rowIndex == 0)
headRows.Add(columnIndex, _style.ConvertValueByStyleFormat(xfIndex, cellValue).ToString());
else
{
var v = _style.ConvertValueByStyleFormat(int.Parse(aS), cellValue);
cell[headRows[columnIndex]] = _style.ConvertValueByStyleFormat(xfIndex, cellValue);
}
}
else
cell[Helpers.GetAlphabetColumnName(columnIndex)] = _style.ConvertValueByStyleFormat(xfIndex, cellValue);
}
else
{
//if not using First Head then using 1,2,3 as index
if (UseHeaderRow)
{
if (rowIndex == 0)
headRows.Add(columnIndex, cellValue.ToString());
else
cell[headRows[columnIndex]] = cellValue;
}
else
cell[Helpers.GetAlphabetColumnName(columnIndex)] = cellValue;
}
}
else if (!XmlReaderHelper.SkipContent(reader))
break;
}
if (UseHeaderRow && rowIndex == 0)
continue;
yield return cell;
}
}
else if (!XmlReaderHelper.SkipContent(reader))
{
break;
}
}
}
else if (!XmlReaderHelper.SkipContent(reader))
{
break;
}
}
}
}
}
internal IEnumerable<IDictionary<string, object>> Query(Stream stream, bool UseHeaderRow = false)
{
using (var archive = new ExcelOpenXmlZip(stream))

View File

@ -26,14 +26,6 @@ namespace MiniExcelLibs.Tests
Assert.Equal("A2", rows[1].c1);
Assert.Equal("B2", rows[1].c2);
}
{
var rows = MiniExcel.Query(path,useHeaderRow: true).ToList();
Assert.Equal("A1", rows[0].c1);
Assert.Equal("B1", rows[0].c2);
Assert.Equal("A2", rows[1].c1);
Assert.Equal("B2", rows[1].c2);
}
}
[Fact()]

View File

@ -121,8 +121,6 @@ namespace MiniExcelLibs.Tests
}
}
public class DemoPocoHelloWorld
{
public string HelloWorld { get; set; }
@ -184,7 +182,7 @@ namespace MiniExcelLibs.Tests
var path = @"..\..\..\..\..\samples\xlsx\TestDatetimeSpanFormat_ClosedXml.xlsx";
using (var stream = FileHelper.OpenRead(path))
{
var row = stream.QueryFirst();
var row = stream.Query().First();
var a = row.A;
var b = row.B;
Assert.Equal(DateTime.Parse("2021-03-20T23:39:42.3130000"), (DateTime)a);
@ -447,7 +445,7 @@ namespace MiniExcelLibs.Tests
{
var path = @"..\..\..\..\..\samples\xlsx\Test1,000,000x10\Test1,000,000x10.xlsx";
using (var stream = File.OpenRead(path))
Assert.Equal("HelloWorld", stream.QueryFirst().A);
Assert.Equal("HelloWorld", stream.Query().First().A);
}
[Fact()]