mirror of
https://gitee.com/dotnetchina/MiniExcel.git
synced 2024-11-29 18:38:08 +08:00
feat: cache sharedstrings in sqlite
This commit is contained in:
parent
bbe29ae1b3
commit
65ad0f7e4c
175
src/MiniExcel/DbList.cs
Normal file
175
src/MiniExcel/DbList.cs
Normal file
@ -0,0 +1,175 @@
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Data.SQLite;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
|
||||
namespace MiniExcelLibs
|
||||
{
|
||||
public class DbList : IList<string>, IDisposable
|
||||
{
|
||||
private SQLiteConnection _conn;
|
||||
private SQLiteCommand _cmd;
|
||||
private string _name;
|
||||
private const string _tableName = "sharedStrings";
|
||||
|
||||
public DbList(string name)
|
||||
{
|
||||
_name = name;
|
||||
_conn = new SQLiteConnection($"Data Source={name}.db;Version=3;");
|
||||
_conn.Open();
|
||||
_cmd = _conn.CreateCommand();
|
||||
|
||||
CreateTable();
|
||||
}
|
||||
|
||||
private void CreateTable()
|
||||
{
|
||||
Clear();
|
||||
_cmd.CommandText = $@"
|
||||
CREATE TABLE {_tableName} (name TEXT, `index` INTEGER);
|
||||
|
||||
CREATE UNIQUE INDEX idx_index
|
||||
ON sharedStrings (
|
||||
`index`
|
||||
);
|
||||
|
||||
CREATE INDEX idx_name
|
||||
ON sharedStrings (
|
||||
name
|
||||
);";
|
||||
_cmd.ExecuteNonQuery();
|
||||
}
|
||||
|
||||
public IEnumerator<string> GetEnumerator()
|
||||
{
|
||||
throw new System.NotImplementedException();
|
||||
}
|
||||
|
||||
IEnumerator IEnumerable.GetEnumerator()
|
||||
{
|
||||
throw new System.NotImplementedException();
|
||||
}
|
||||
|
||||
public void Add(string item)
|
||||
{
|
||||
var maxIndex = GetMaxIndex();
|
||||
_cmd.CommandText = $"INSERT INTO {_tableName}(name, `index`) VALUES ('{item}', {maxIndex + 1})";
|
||||
_cmd.ExecuteNonQuery();
|
||||
}
|
||||
|
||||
private long GetMaxIndex()
|
||||
{
|
||||
_cmd.CommandText = $"SELECT MAX(`index`) FROM {_tableName}";
|
||||
var result = _cmd.ExecuteScalar();
|
||||
if (result == DBNull.Value)
|
||||
return -1;
|
||||
|
||||
return (long)result;
|
||||
}
|
||||
|
||||
public void Clear()
|
||||
{
|
||||
_cmd.CommandText = $"DROP TABLE IF EXISTS {_tableName}";
|
||||
_cmd.ExecuteNonQuery();
|
||||
}
|
||||
|
||||
public bool Contains(string item)
|
||||
{
|
||||
_cmd.CommandText = $"SELECT * FROM {_tableName} WHERE name = '{item}'";
|
||||
return _cmd.ExecuteScalar() != null;
|
||||
}
|
||||
|
||||
public void CopyTo(string[] array, int arrayIndex)
|
||||
{
|
||||
throw new System.NotImplementedException();
|
||||
}
|
||||
|
||||
public void AddRange(List<string> array)
|
||||
{
|
||||
var maxIndex = GetMaxIndex();
|
||||
|
||||
var cmdTxt = new StringBuilder();
|
||||
|
||||
cmdTxt.Append($"INSERT INTO {_tableName}(name, `index`) VALUES");
|
||||
for (var i = 0; i < array.Count; i++)
|
||||
{
|
||||
var item = array[i];
|
||||
cmdTxt.Append($"('{item}', {maxIndex + i + 1})");
|
||||
cmdTxt.Append(i != array.Count - 1 ? ',' : ';');
|
||||
}
|
||||
|
||||
_cmd.CommandText = cmdTxt.ToString();
|
||||
_cmd.ExecuteNonQuery();
|
||||
}
|
||||
|
||||
public bool Remove(string item)
|
||||
{
|
||||
_cmd.CommandText = $"DELETE FROM {_tableName} WHERE name = '{item}'";
|
||||
return _cmd.ExecuteNonQuery() > 0;
|
||||
}
|
||||
|
||||
public int Count
|
||||
{
|
||||
get
|
||||
{
|
||||
_cmd.CommandText = "SELECT COUNT(*) FROM " + _tableName;
|
||||
return Convert.ToInt32(_cmd.ExecuteScalar());
|
||||
}
|
||||
}
|
||||
|
||||
public bool IsReadOnly { get; }
|
||||
|
||||
public int IndexOf(string item)
|
||||
{
|
||||
_cmd.CommandText = $"SELECT `index` FROM {_tableName} WHERE name = '{item}'";
|
||||
return (int)_cmd.ExecuteScalar();
|
||||
}
|
||||
|
||||
public void Insert(int index, string item)
|
||||
{
|
||||
_cmd.CommandText = $"UPDATE {_tableName} SET `index` = `index` + 1 WHERE index >= {index}";
|
||||
_cmd.ExecuteNonQuery();
|
||||
_cmd.CommandText = $"INSERT INTO {_tableName}(name, `index`) VALUES ('{item}', {index})";
|
||||
_cmd.ExecuteNonQuery();
|
||||
}
|
||||
|
||||
public void RemoveAt(int index)
|
||||
{
|
||||
_cmd.CommandText = $"DELETE FROM {_tableName} WHERE `index` = {index}";
|
||||
_cmd.ExecuteNonQuery();
|
||||
}
|
||||
|
||||
public string this[int index]
|
||||
{
|
||||
get
|
||||
{
|
||||
_cmd.CommandText = $"SELECT name FROM {_tableName} WHERE `index` = {index}";
|
||||
return (string)_cmd.ExecuteScalar();
|
||||
}
|
||||
set
|
||||
{
|
||||
_cmd.CommandText = $"UPDATE {_tableName} SET name = '{value}' WHERE `index` = {index}";
|
||||
_cmd.ExecuteNonQuery();
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_cmd != null)
|
||||
{
|
||||
_cmd.Dispose();
|
||||
_cmd = null;
|
||||
}
|
||||
|
||||
if (_conn != null)
|
||||
{
|
||||
_conn.Dispose();
|
||||
_conn = null;
|
||||
}
|
||||
|
||||
File.Delete($"{_name}.db");
|
||||
}
|
||||
}
|
||||
}
|
@ -44,5 +44,6 @@ Todo : https://github.com/shps951023/MiniExcel/projects/1?fullscreen=true
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="ExcelNumberFormat" Version="1.1.0" />
|
||||
<PackageReference Include="System.Data.SQLite.Core" Version="1.0.115.5" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
|
@ -17,7 +17,7 @@ namespace MiniExcelLibs.OpenXml
|
||||
private static readonly string[] _ns = { Config.SpreadsheetmlXmlns, Config.SpreadsheetmlXmlStrictns };
|
||||
private static readonly string[] _relationshiopNs = { Config.SpreadsheetmlXmlRelationshipns, Config.SpreadsheetmlXmlStrictRelationshipns };
|
||||
private List<SheetRecord> _sheetRecords;
|
||||
private List<string> _sharedStrings;
|
||||
private IList<string> _sharedStrings;
|
||||
private MergeCells _mergeCells;
|
||||
private ExcelOpenXmlStyles _style;
|
||||
private readonly ExcelOpenXmlZip _archive;
|
||||
@ -490,42 +490,44 @@ namespace MiniExcelLibs.OpenXml
|
||||
return;
|
||||
using (var stream = sharedStringsEntry.Open())
|
||||
{
|
||||
_sharedStrings = GetSharedStrings(stream).ToList();
|
||||
if (_config.EnableSharedStringCache && sharedStringsEntry.Length >= _config.SharedStringCacheSize)
|
||||
{
|
||||
// use sqlite
|
||||
var dbList = new DbList(Guid.NewGuid().ToString());
|
||||
|
||||
var list = new List<string>();
|
||||
foreach (var sharedString in XmlReaderHelper.GetSharedStrings(stream, _ns))
|
||||
{
|
||||
list.Add(sharedString);
|
||||
if (list.Count >= 10000)
|
||||
{
|
||||
dbList.AddRange(list);
|
||||
list.Clear();
|
||||
}
|
||||
}
|
||||
|
||||
internal List<string> GetSharedStrings()
|
||||
if (list.Count > 0)
|
||||
{
|
||||
dbList.AddRange(list);
|
||||
list.Clear();
|
||||
}
|
||||
|
||||
_sharedStrings = dbList;
|
||||
}
|
||||
else
|
||||
{
|
||||
_sharedStrings = XmlReaderHelper.GetSharedStrings(stream, _ns).ToList();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal IList<string> GetSharedStrings()
|
||||
{
|
||||
if (_sharedStrings == null)
|
||||
SetSharedStrings();
|
||||
return _sharedStrings;
|
||||
}
|
||||
|
||||
private IEnumerable<string> GetSharedStrings(Stream stream)
|
||||
{
|
||||
using (var reader = XmlReader.Create(stream))
|
||||
{
|
||||
if (!XmlReaderHelper.IsStartElement(reader, "sst", _ns))
|
||||
yield break;
|
||||
|
||||
if (!XmlReaderHelper.ReadFirstContent(reader))
|
||||
yield break;
|
||||
|
||||
while (!reader.EOF)
|
||||
{
|
||||
if (XmlReaderHelper.IsStartElement(reader, "si", _ns))
|
||||
{
|
||||
var value = StringHelper.ReadStringItem(reader);
|
||||
yield return value;
|
||||
}
|
||||
else if (!XmlReaderHelper.SkipContent(reader))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void SetWorkbookRels(ReadOnlyCollection<ZipArchiveEntry> entries)
|
||||
{
|
||||
if (_sheetRecords != null)
|
||||
|
@ -90,7 +90,7 @@ namespace MiniExcelLibs.OpenXml
|
||||
private Dictionary<string, XMergeCell> XMergeCellInfos { get; set; }
|
||||
public List<XMergeCell> NewXMergeCellInfos { get; private set; }
|
||||
|
||||
private void GenerateSheetXmlImpl(ZipArchiveEntry sheetZipEntry, Stream stream, Stream sheetStream, Dictionary<string, object> inputMaps, List<string> sharedStrings, XmlWriterSettings xmlWriterSettings = null)
|
||||
private void GenerateSheetXmlImpl(ZipArchiveEntry sheetZipEntry, Stream stream, Stream sheetStream, Dictionary<string, object> inputMaps, IList<string> sharedStrings, XmlWriterSettings xmlWriterSettings = null)
|
||||
{
|
||||
var doc = new XmlDocument();
|
||||
doc.Load(sheetStream);
|
||||
@ -387,7 +387,7 @@ namespace MiniExcelLibs.OpenXml
|
||||
.Replace($"xmlns{endPrefix}=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\"", "");
|
||||
}
|
||||
|
||||
private void ReplaceSharedStringsToStr(List<string> sharedStrings, ref XmlNodeList rows)
|
||||
private void ReplaceSharedStringsToStr(IList<string> sharedStrings, ref XmlNodeList rows)
|
||||
{
|
||||
foreach (XmlElement row in rows)
|
||||
{
|
||||
|
@ -11,5 +11,9 @@ namespace MiniExcelLibs.OpenXml
|
||||
public bool AutoFilter { get; set; } = true;
|
||||
public bool EnableConvertByteArray { get; set; } = true;
|
||||
public bool IgnoreTemplateParameterMissing { get; set; } = true;
|
||||
|
||||
// currently, this is a preview functional
|
||||
public bool EnableSharedStringCache { get; set; } = false;
|
||||
public int SharedStringCacheSize { get; set; } = 5 * 1024 * 1024;
|
||||
}
|
||||
}
|
@ -1,4 +1,6 @@
|
||||
using System.Linq;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
|
||||
namespace MiniExcelLibs.Utils
|
||||
{
|
||||
@ -74,6 +76,31 @@ namespace MiniExcelLibs.Utils
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public static IEnumerable<string> GetSharedStrings(Stream stream, params string[] nss)
|
||||
{
|
||||
using (var reader = XmlReader.Create(stream))
|
||||
{
|
||||
if (!XmlReaderHelper.IsStartElement(reader, "sst", nss))
|
||||
yield break;
|
||||
|
||||
if (!XmlReaderHelper.ReadFirstContent(reader))
|
||||
yield break;
|
||||
|
||||
while (!reader.EOF)
|
||||
{
|
||||
if (XmlReaderHelper.IsStartElement(reader, "si", nss))
|
||||
{
|
||||
var value = StringHelper.ReadStringItem(reader);
|
||||
yield return value;
|
||||
}
|
||||
else if (!XmlReaderHelper.SkipContent(reader))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -9,16 +9,24 @@ using System.Data;
|
||||
using ExcelDataReader;
|
||||
using System.Collections.Generic;
|
||||
using System.Data.SQLite;
|
||||
using System.Diagnostics;
|
||||
using Dapper;
|
||||
using System.Globalization;
|
||||
using static MiniExcelLibs.Tests.Utils.MiniExcelOpenXml;
|
||||
using MiniExcelLibs.Tests.Utils;
|
||||
using MiniExcelLibs.Attributes;
|
||||
using MiniExcelLibs.OpenXml;
|
||||
using Xunit.Abstractions;
|
||||
|
||||
namespace MiniExcelLibs.Tests
|
||||
{
|
||||
public partial class MiniExcelOpenXmlTests
|
||||
{
|
||||
private readonly ITestOutputHelper output;
|
||||
public MiniExcelOpenXmlTests(ITestOutputHelper output)
|
||||
{
|
||||
this.output = output;
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetColumnsTest()
|
||||
@ -1161,5 +1169,34 @@ namespace MiniExcelLibs.Tests
|
||||
Assert.Equal(rows[1].B , "value2");
|
||||
Assert.Equal(rows[1].C , "value3");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SharedStringCacheTest()
|
||||
{
|
||||
var path = "../../../../../benchmarks/MiniExcel.Benchmarks/Test1,000,000x10_SharingStrings.xlsx";
|
||||
var stopWatch = new Stopwatch();
|
||||
stopWatch.Start();
|
||||
MiniExcel.Query(path).First();
|
||||
Process currentProcess = Process.GetCurrentProcess();
|
||||
long totalBytesOfMemoryUsed = currentProcess.WorkingSet64;
|
||||
output.WriteLine("totalBytesOfMemoryUsed: " + totalBytesOfMemoryUsed);
|
||||
output.WriteLine("elapsedMilliseconds: " + stopWatch.ElapsedMilliseconds);
|
||||
stopWatch.Stop();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SharedStringNoCacheTest()
|
||||
{
|
||||
var path = "../../../../../benchmarks/MiniExcel.Benchmarks/Test1,000,000x10_SharingStrings.xlsx";
|
||||
var stopWatch = new Stopwatch();
|
||||
stopWatch.Start();
|
||||
MiniExcel.Query(path,
|
||||
configuration: new OpenXmlConfiguration() { SharedStringCacheSize = int.MaxValue }).First();
|
||||
Process currentProcess = Process.GetCurrentProcess();
|
||||
long totalBytesOfMemoryUsed = currentProcess.WorkingSet64;
|
||||
output.WriteLine("totalBytesOfMemoryUsed: " + totalBytesOfMemoryUsed);
|
||||
output.WriteLine("elapsedMilliseconds: " + stopWatch.ElapsedMilliseconds);
|
||||
stopWatch.Stop();
|
||||
}
|
||||
}
|
||||
}
|
@ -21,7 +21,7 @@
|
||||
<PackageReference Include="ExcelDataReader" Version="3.6.0" />
|
||||
<PackageReference Include="ExcelDataReader.DataSet" Version="3.6.0" />
|
||||
<PackageReference Include="Newtonsoft.Json" Version="13.0.1" />
|
||||
<PackageReference Include="System.Data.SQLite.Core" Version="1.0.113.7" />
|
||||
<PackageReference Include="System.Data.SQLite.Core" Version="1.0.115.5" />
|
||||
<PackageReference Include="System.Text.Encoding.CodePages" Version="5.0.0" />
|
||||
<PackageReference Include="xunit" Version="2.4.1" />
|
||||
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.1">
|
||||
|
Loading…
Reference in New Issue
Block a user