mirror of
https://gitee.com/dotnetchina/MiniExcel.git
synced 2024-11-29 18:38:08 +08:00
feat: cache sharedstrings in sqlite
This commit is contained in:
parent
bbe29ae1b3
commit
65ad0f7e4c
175
src/MiniExcel/DbList.cs
Normal file
175
src/MiniExcel/DbList.cs
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Data.SQLite;
|
||||||
|
using System.IO;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace MiniExcelLibs
|
||||||
|
{
|
||||||
|
public class DbList : IList<string>, IDisposable
|
||||||
|
{
|
||||||
|
private SQLiteConnection _conn;
|
||||||
|
private SQLiteCommand _cmd;
|
||||||
|
private string _name;
|
||||||
|
private const string _tableName = "sharedStrings";
|
||||||
|
|
||||||
|
public DbList(string name)
|
||||||
|
{
|
||||||
|
_name = name;
|
||||||
|
_conn = new SQLiteConnection($"Data Source={name}.db;Version=3;");
|
||||||
|
_conn.Open();
|
||||||
|
_cmd = _conn.CreateCommand();
|
||||||
|
|
||||||
|
CreateTable();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void CreateTable()
|
||||||
|
{
|
||||||
|
Clear();
|
||||||
|
_cmd.CommandText = $@"
|
||||||
|
CREATE TABLE {_tableName} (name TEXT, `index` INTEGER);
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX idx_index
|
||||||
|
ON sharedStrings (
|
||||||
|
`index`
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_name
|
||||||
|
ON sharedStrings (
|
||||||
|
name
|
||||||
|
);";
|
||||||
|
_cmd.ExecuteNonQuery();
|
||||||
|
}
|
||||||
|
|
||||||
|
public IEnumerator<string> GetEnumerator()
|
||||||
|
{
|
||||||
|
throw new System.NotImplementedException();
|
||||||
|
}
|
||||||
|
|
||||||
|
IEnumerator IEnumerable.GetEnumerator()
|
||||||
|
{
|
||||||
|
throw new System.NotImplementedException();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Add(string item)
|
||||||
|
{
|
||||||
|
var maxIndex = GetMaxIndex();
|
||||||
|
_cmd.CommandText = $"INSERT INTO {_tableName}(name, `index`) VALUES ('{item}', {maxIndex + 1})";
|
||||||
|
_cmd.ExecuteNonQuery();
|
||||||
|
}
|
||||||
|
|
||||||
|
private long GetMaxIndex()
|
||||||
|
{
|
||||||
|
_cmd.CommandText = $"SELECT MAX(`index`) FROM {_tableName}";
|
||||||
|
var result = _cmd.ExecuteScalar();
|
||||||
|
if (result == DBNull.Value)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
return (long)result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Clear()
|
||||||
|
{
|
||||||
|
_cmd.CommandText = $"DROP TABLE IF EXISTS {_tableName}";
|
||||||
|
_cmd.ExecuteNonQuery();
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool Contains(string item)
|
||||||
|
{
|
||||||
|
_cmd.CommandText = $"SELECT * FROM {_tableName} WHERE name = '{item}'";
|
||||||
|
return _cmd.ExecuteScalar() != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void CopyTo(string[] array, int arrayIndex)
|
||||||
|
{
|
||||||
|
throw new System.NotImplementedException();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void AddRange(List<string> array)
|
||||||
|
{
|
||||||
|
var maxIndex = GetMaxIndex();
|
||||||
|
|
||||||
|
var cmdTxt = new StringBuilder();
|
||||||
|
|
||||||
|
cmdTxt.Append($"INSERT INTO {_tableName}(name, `index`) VALUES");
|
||||||
|
for (var i = 0; i < array.Count; i++)
|
||||||
|
{
|
||||||
|
var item = array[i];
|
||||||
|
cmdTxt.Append($"('{item}', {maxIndex + i + 1})");
|
||||||
|
cmdTxt.Append(i != array.Count - 1 ? ',' : ';');
|
||||||
|
}
|
||||||
|
|
||||||
|
_cmd.CommandText = cmdTxt.ToString();
|
||||||
|
_cmd.ExecuteNonQuery();
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool Remove(string item)
|
||||||
|
{
|
||||||
|
_cmd.CommandText = $"DELETE FROM {_tableName} WHERE name = '{item}'";
|
||||||
|
return _cmd.ExecuteNonQuery() > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int Count
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
_cmd.CommandText = "SELECT COUNT(*) FROM " + _tableName;
|
||||||
|
return Convert.ToInt32(_cmd.ExecuteScalar());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool IsReadOnly { get; }
|
||||||
|
|
||||||
|
public int IndexOf(string item)
|
||||||
|
{
|
||||||
|
_cmd.CommandText = $"SELECT `index` FROM {_tableName} WHERE name = '{item}'";
|
||||||
|
return (int)_cmd.ExecuteScalar();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Insert(int index, string item)
|
||||||
|
{
|
||||||
|
_cmd.CommandText = $"UPDATE {_tableName} SET `index` = `index` + 1 WHERE index >= {index}";
|
||||||
|
_cmd.ExecuteNonQuery();
|
||||||
|
_cmd.CommandText = $"INSERT INTO {_tableName}(name, `index`) VALUES ('{item}', {index})";
|
||||||
|
_cmd.ExecuteNonQuery();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void RemoveAt(int index)
|
||||||
|
{
|
||||||
|
_cmd.CommandText = $"DELETE FROM {_tableName} WHERE `index` = {index}";
|
||||||
|
_cmd.ExecuteNonQuery();
|
||||||
|
}
|
||||||
|
|
||||||
|
public string this[int index]
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
_cmd.CommandText = $"SELECT name FROM {_tableName} WHERE `index` = {index}";
|
||||||
|
return (string)_cmd.ExecuteScalar();
|
||||||
|
}
|
||||||
|
set
|
||||||
|
{
|
||||||
|
_cmd.CommandText = $"UPDATE {_tableName} SET name = '{value}' WHERE `index` = {index}";
|
||||||
|
_cmd.ExecuteNonQuery();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
if (_cmd != null)
|
||||||
|
{
|
||||||
|
_cmd.Dispose();
|
||||||
|
_cmd = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_conn != null)
|
||||||
|
{
|
||||||
|
_conn.Dispose();
|
||||||
|
_conn = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
File.Delete($"{_name}.db");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -44,5 +44,6 @@ Todo : https://github.com/shps951023/MiniExcel/projects/1?fullscreen=true
|
|||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<PackageReference Include="ExcelNumberFormat" Version="1.1.0" />
|
<PackageReference Include="ExcelNumberFormat" Version="1.1.0" />
|
||||||
|
<PackageReference Include="System.Data.SQLite.Core" Version="1.0.115.5" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
@ -17,7 +17,7 @@ namespace MiniExcelLibs.OpenXml
|
|||||||
private static readonly string[] _ns = { Config.SpreadsheetmlXmlns, Config.SpreadsheetmlXmlStrictns };
|
private static readonly string[] _ns = { Config.SpreadsheetmlXmlns, Config.SpreadsheetmlXmlStrictns };
|
||||||
private static readonly string[] _relationshiopNs = { Config.SpreadsheetmlXmlRelationshipns, Config.SpreadsheetmlXmlStrictRelationshipns };
|
private static readonly string[] _relationshiopNs = { Config.SpreadsheetmlXmlRelationshipns, Config.SpreadsheetmlXmlStrictRelationshipns };
|
||||||
private List<SheetRecord> _sheetRecords;
|
private List<SheetRecord> _sheetRecords;
|
||||||
private List<string> _sharedStrings;
|
private IList<string> _sharedStrings;
|
||||||
private MergeCells _mergeCells;
|
private MergeCells _mergeCells;
|
||||||
private ExcelOpenXmlStyles _style;
|
private ExcelOpenXmlStyles _style;
|
||||||
private readonly ExcelOpenXmlZip _archive;
|
private readonly ExcelOpenXmlZip _archive;
|
||||||
@ -490,42 +490,44 @@ namespace MiniExcelLibs.OpenXml
|
|||||||
return;
|
return;
|
||||||
using (var stream = sharedStringsEntry.Open())
|
using (var stream = sharedStringsEntry.Open())
|
||||||
{
|
{
|
||||||
_sharedStrings = GetSharedStrings(stream).ToList();
|
if (_config.EnableSharedStringCache && sharedStringsEntry.Length >= _config.SharedStringCacheSize)
|
||||||
}
|
{
|
||||||
|
// use sqlite
|
||||||
|
var dbList = new DbList(Guid.NewGuid().ToString());
|
||||||
|
|
||||||
|
var list = new List<string>();
|
||||||
|
foreach (var sharedString in XmlReaderHelper.GetSharedStrings(stream, _ns))
|
||||||
|
{
|
||||||
|
list.Add(sharedString);
|
||||||
|
if (list.Count >= 10000)
|
||||||
|
{
|
||||||
|
dbList.AddRange(list);
|
||||||
|
list.Clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (list.Count > 0)
|
||||||
|
{
|
||||||
|
dbList.AddRange(list);
|
||||||
|
list.Clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
_sharedStrings = dbList;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_sharedStrings = XmlReaderHelper.GetSharedStrings(stream, _ns).ToList();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
internal List<string> GetSharedStrings()
|
internal IList<string> GetSharedStrings()
|
||||||
{
|
{
|
||||||
if (_sharedStrings == null)
|
if (_sharedStrings == null)
|
||||||
SetSharedStrings();
|
SetSharedStrings();
|
||||||
return _sharedStrings;
|
return _sharedStrings;
|
||||||
}
|
}
|
||||||
|
|
||||||
private IEnumerable<string> GetSharedStrings(Stream stream)
|
|
||||||
{
|
|
||||||
using (var reader = XmlReader.Create(stream))
|
|
||||||
{
|
|
||||||
if (!XmlReaderHelper.IsStartElement(reader, "sst", _ns))
|
|
||||||
yield break;
|
|
||||||
|
|
||||||
if (!XmlReaderHelper.ReadFirstContent(reader))
|
|
||||||
yield break;
|
|
||||||
|
|
||||||
while (!reader.EOF)
|
|
||||||
{
|
|
||||||
if (XmlReaderHelper.IsStartElement(reader, "si", _ns))
|
|
||||||
{
|
|
||||||
var value = StringHelper.ReadStringItem(reader);
|
|
||||||
yield return value;
|
|
||||||
}
|
|
||||||
else if (!XmlReaderHelper.SkipContent(reader))
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void SetWorkbookRels(ReadOnlyCollection<ZipArchiveEntry> entries)
|
private void SetWorkbookRels(ReadOnlyCollection<ZipArchiveEntry> entries)
|
||||||
{
|
{
|
||||||
if (_sheetRecords != null)
|
if (_sheetRecords != null)
|
||||||
|
@ -90,7 +90,7 @@ namespace MiniExcelLibs.OpenXml
|
|||||||
private Dictionary<string, XMergeCell> XMergeCellInfos { get; set; }
|
private Dictionary<string, XMergeCell> XMergeCellInfos { get; set; }
|
||||||
public List<XMergeCell> NewXMergeCellInfos { get; private set; }
|
public List<XMergeCell> NewXMergeCellInfos { get; private set; }
|
||||||
|
|
||||||
private void GenerateSheetXmlImpl(ZipArchiveEntry sheetZipEntry, Stream stream, Stream sheetStream, Dictionary<string, object> inputMaps, List<string> sharedStrings, XmlWriterSettings xmlWriterSettings = null)
|
private void GenerateSheetXmlImpl(ZipArchiveEntry sheetZipEntry, Stream stream, Stream sheetStream, Dictionary<string, object> inputMaps, IList<string> sharedStrings, XmlWriterSettings xmlWriterSettings = null)
|
||||||
{
|
{
|
||||||
var doc = new XmlDocument();
|
var doc = new XmlDocument();
|
||||||
doc.Load(sheetStream);
|
doc.Load(sheetStream);
|
||||||
@ -387,7 +387,7 @@ namespace MiniExcelLibs.OpenXml
|
|||||||
.Replace($"xmlns{endPrefix}=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\"", "");
|
.Replace($"xmlns{endPrefix}=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\"", "");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void ReplaceSharedStringsToStr(List<string> sharedStrings, ref XmlNodeList rows)
|
private void ReplaceSharedStringsToStr(IList<string> sharedStrings, ref XmlNodeList rows)
|
||||||
{
|
{
|
||||||
foreach (XmlElement row in rows)
|
foreach (XmlElement row in rows)
|
||||||
{
|
{
|
||||||
|
@ -11,5 +11,9 @@ namespace MiniExcelLibs.OpenXml
|
|||||||
public bool AutoFilter { get; set; } = true;
|
public bool AutoFilter { get; set; } = true;
|
||||||
public bool EnableConvertByteArray { get; set; } = true;
|
public bool EnableConvertByteArray { get; set; } = true;
|
||||||
public bool IgnoreTemplateParameterMissing { get; set; } = true;
|
public bool IgnoreTemplateParameterMissing { get; set; } = true;
|
||||||
|
|
||||||
|
// currently, this is a preview functional
|
||||||
|
public bool EnableSharedStringCache { get; set; } = false;
|
||||||
|
public int SharedStringCacheSize { get; set; } = 5 * 1024 * 1024;
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -1,4 +1,6 @@
|
|||||||
using System.Linq;
|
using System.Collections.Generic;
|
||||||
|
using System.IO;
|
||||||
|
using System.Linq;
|
||||||
|
|
||||||
namespace MiniExcelLibs.Utils
|
namespace MiniExcelLibs.Utils
|
||||||
{
|
{
|
||||||
@ -74,6 +76,31 @@ namespace MiniExcelLibs.Utils
|
|||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static IEnumerable<string> GetSharedStrings(Stream stream, params string[] nss)
|
||||||
|
{
|
||||||
|
using (var reader = XmlReader.Create(stream))
|
||||||
|
{
|
||||||
|
if (!XmlReaderHelper.IsStartElement(reader, "sst", nss))
|
||||||
|
yield break;
|
||||||
|
|
||||||
|
if (!XmlReaderHelper.ReadFirstContent(reader))
|
||||||
|
yield break;
|
||||||
|
|
||||||
|
while (!reader.EOF)
|
||||||
|
{
|
||||||
|
if (XmlReaderHelper.IsStartElement(reader, "si", nss))
|
||||||
|
{
|
||||||
|
var value = StringHelper.ReadStringItem(reader);
|
||||||
|
yield return value;
|
||||||
|
}
|
||||||
|
else if (!XmlReaderHelper.SkipContent(reader))
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -9,17 +9,25 @@ using System.Data;
|
|||||||
using ExcelDataReader;
|
using ExcelDataReader;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Data.SQLite;
|
using System.Data.SQLite;
|
||||||
|
using System.Diagnostics;
|
||||||
using Dapper;
|
using Dapper;
|
||||||
using System.Globalization;
|
using System.Globalization;
|
||||||
using static MiniExcelLibs.Tests.Utils.MiniExcelOpenXml;
|
using static MiniExcelLibs.Tests.Utils.MiniExcelOpenXml;
|
||||||
using MiniExcelLibs.Tests.Utils;
|
using MiniExcelLibs.Tests.Utils;
|
||||||
using MiniExcelLibs.Attributes;
|
using MiniExcelLibs.Attributes;
|
||||||
|
using MiniExcelLibs.OpenXml;
|
||||||
|
using Xunit.Abstractions;
|
||||||
|
|
||||||
namespace MiniExcelLibs.Tests
|
namespace MiniExcelLibs.Tests
|
||||||
{
|
{
|
||||||
public partial class MiniExcelOpenXmlTests
|
public partial class MiniExcelOpenXmlTests
|
||||||
{
|
{
|
||||||
|
private readonly ITestOutputHelper output;
|
||||||
|
public MiniExcelOpenXmlTests(ITestOutputHelper output)
|
||||||
|
{
|
||||||
|
this.output = output;
|
||||||
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void GetColumnsTest()
|
public void GetColumnsTest()
|
||||||
{
|
{
|
||||||
@ -1161,5 +1169,34 @@ namespace MiniExcelLibs.Tests
|
|||||||
Assert.Equal(rows[1].B , "value2");
|
Assert.Equal(rows[1].B , "value2");
|
||||||
Assert.Equal(rows[1].C , "value3");
|
Assert.Equal(rows[1].C , "value3");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void SharedStringCacheTest()
|
||||||
|
{
|
||||||
|
var path = "../../../../../benchmarks/MiniExcel.Benchmarks/Test1,000,000x10_SharingStrings.xlsx";
|
||||||
|
var stopWatch = new Stopwatch();
|
||||||
|
stopWatch.Start();
|
||||||
|
MiniExcel.Query(path).First();
|
||||||
|
Process currentProcess = Process.GetCurrentProcess();
|
||||||
|
long totalBytesOfMemoryUsed = currentProcess.WorkingSet64;
|
||||||
|
output.WriteLine("totalBytesOfMemoryUsed: " + totalBytesOfMemoryUsed);
|
||||||
|
output.WriteLine("elapsedMilliseconds: " + stopWatch.ElapsedMilliseconds);
|
||||||
|
stopWatch.Stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void SharedStringNoCacheTest()
|
||||||
|
{
|
||||||
|
var path = "../../../../../benchmarks/MiniExcel.Benchmarks/Test1,000,000x10_SharingStrings.xlsx";
|
||||||
|
var stopWatch = new Stopwatch();
|
||||||
|
stopWatch.Start();
|
||||||
|
MiniExcel.Query(path,
|
||||||
|
configuration: new OpenXmlConfiguration() { SharedStringCacheSize = int.MaxValue }).First();
|
||||||
|
Process currentProcess = Process.GetCurrentProcess();
|
||||||
|
long totalBytesOfMemoryUsed = currentProcess.WorkingSet64;
|
||||||
|
output.WriteLine("totalBytesOfMemoryUsed: " + totalBytesOfMemoryUsed);
|
||||||
|
output.WriteLine("elapsedMilliseconds: " + stopWatch.ElapsedMilliseconds);
|
||||||
|
stopWatch.Stop();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -21,7 +21,7 @@
|
|||||||
<PackageReference Include="ExcelDataReader" Version="3.6.0" />
|
<PackageReference Include="ExcelDataReader" Version="3.6.0" />
|
||||||
<PackageReference Include="ExcelDataReader.DataSet" Version="3.6.0" />
|
<PackageReference Include="ExcelDataReader.DataSet" Version="3.6.0" />
|
||||||
<PackageReference Include="Newtonsoft.Json" Version="13.0.1" />
|
<PackageReference Include="Newtonsoft.Json" Version="13.0.1" />
|
||||||
<PackageReference Include="System.Data.SQLite.Core" Version="1.0.113.7" />
|
<PackageReference Include="System.Data.SQLite.Core" Version="1.0.115.5" />
|
||||||
<PackageReference Include="System.Text.Encoding.CodePages" Version="5.0.0" />
|
<PackageReference Include="System.Text.Encoding.CodePages" Version="5.0.0" />
|
||||||
<PackageReference Include="xunit" Version="2.4.1" />
|
<PackageReference Include="xunit" Version="2.4.1" />
|
||||||
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.1">
|
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.1">
|
||||||
|
Loading…
Reference in New Issue
Block a user