diff --git a/drafts/【MiniExcel】Auto specify file or stream from xlsx or xls or csv.linq b/drafts/【MiniExcel】Auto specify file or stream from xlsx or xls or csv.linq new file mode 100644 index 0000000..0540e29 --- /dev/null +++ b/drafts/【MiniExcel】Auto specify file or stream from xlsx or xls or csv.linq @@ -0,0 +1,74 @@ + + Dapper + MiniExcel + Newtonsoft.Json + System.Data.SqlClient + MiniExcelLibs + Newtonsoft.Json + System.Data + System.Diagnostics + System.Linq.Expressions + System.Text + System.Text.RegularExpressions + System.Threading + System.Transactions + System.Xml + System.Xml.Linq + System.Xml.XPath + + +void Main() +{ + var files = Directory.GetFiles(@"D:\git\ExcelDataReader\test\Resources"); + foreach (var path in files) + { + if (Helpers.GetExcelType(path) != Helpers.GetExcelType(File.OpenRead(path))) + Console.WriteLine($"{path} : {Helpers.GetExcelType(path)} and {Helpers.GetExcelType(File.OpenRead(path))}"); + } +} + +// You can define other methods, fields, classes and namespaces here +internal static class Helpers +{ + public enum ExcelType + { + XLSX, + XLS, + CSV, + UNKNOWN + } + + internal static ExcelType GetExcelType(string path) + { + switch (Path.GetExtension(path).ToLowerInvariant()) + { + case ".csv": + return ExcelType.CSV; + case ".xlsx": + return ExcelType.XLSX; + case ".xls": + return ExcelType.XLS; + default: + return ExcelType.UNKNOWN; + } + } + + // modified from : [.net - How to know stream is xlsx or xls or csv? - Stack Overflow](https://stackoverflow.com/questions/66731497/how-to-know-stream-is-xlsx-or-xls-or-csv/66765911#66765911) + internal static ExcelType GetExcelType(Stream stream) + { + var buffer = new byte[512]; + stream.Read(buffer, 0, buffer.Length); + var flag = BitConverter.ToUInt32(buffer, 0); + switch (flag) + { + // Old office format (can be any office file) + case 0xE011CFD0: + return ExcelType.XLS; + // New office format (can be any ZIP archive) + case 0x04034B50: + return ExcelType.XLSX; + default : + return ExcelType.CSV; + } + } +} \ No newline at end of file diff --git a/drafts/【MiniExcel】Avoid Re-New Stream.linq b/drafts/【MiniExcel】Avoid Re-New Stream.linq new file mode 100644 index 0000000..e767b5d --- /dev/null +++ b/drafts/【MiniExcel】Avoid Re-New Stream.linq @@ -0,0 +1,46 @@ + + Dapper + MiniExcel + Newtonsoft.Json + System.Data.SqlClient + MiniExcelLibs + Newtonsoft.Json + System.Data + System.Diagnostics + System.Linq.Expressions + System.Text + System.Text.RegularExpressions + System.Threading + System.Transactions + System.Xml + System.Xml.Linq + System.Xml.XPath + + +void Main() +{ + var path = @"D:\git\MiniExcel\samples\csv\Test5x2.csv"; + using (var stream = File.OpenRead(path)) + using (var reader = new StreamReader(stream)) + { + { + var content = reader.ReadToEnd(); + Console.WriteLine("First Read:"); + Console.WriteLine(content); //result: A1... + } + { + + var content = reader.ReadToEnd(); + Console.WriteLine("Seond Read:"); + Console.WriteLine(content); //result: empty + } + { + stream.Position=0; + var content = reader.ReadToEnd(); + Console.WriteLine("After set position=0 Read:"); + Console.WriteLine(content); //result: A1... + } + } +} + +// You can define other methods, fields, classes and namespaces here diff --git a/drafts/【MiniExcel】CSV Query.linq b/drafts/【MiniExcel】CSV Query.linq new file mode 100644 index 0000000..b47016f --- /dev/null +++ b/drafts/【MiniExcel】CSV Query.linq @@ -0,0 +1,229 @@ + + Dapper + MiniExcel + Newtonsoft.Json + System.Data.SqlClient + MiniExcelLibs + Newtonsoft.Json + Xunit + MiniExcelLibs.Utils + System.Data + System.Linq.Expressions + System.Threading + System.Transactions + System.Xml + System.Xml.Linq + System.Xml.XPath + + +#load "xunit" + +void Main() +{ + RunTests(); // Call RunTests() or press Alt+Shift+T to initiate testing. +} + +[Fact()] +public void TestReadFirstFromLargeFile() +{ + var path = @"D:\git\MiniExcel\samples\csv\TestLargeFile_1,000,000.csv"; + using (var stream = File.OpenRead(path)) + { + var rows = MiniExcel.Query(stream, false).Take(2).ToList(); + Assert.Equal("Id", rows[0].A); + Assert.Equal("Text", rows[0].B); + Assert.Equal("0", rows[1].A); + Assert.Equal("Hello World", rows[1].B); + } +} + +[Fact()] +public void TestReadHeader() +{ + var path = @"D:\git\MiniExcel\samples\csv\TestHeader.csv"; + using (var stream = File.OpenRead(path)) + { + var rows = MiniExcel.Query(stream, true).ToList(); + Assert.Equal("A1", rows[0].Column1); + Assert.Equal("B1", rows[0].Column2); + Assert.Equal("A2", rows[1].Column1); + Assert.Equal("B2", rows[1].Column2); + } +} + +// You can define other methods, fields, classes and namespaces here +public static class MiniExcel +{ + public static IEnumerable Query(this FileStream stream, bool useHeaderRow, IConfiguration configuration = null) + { + return CsvReader.Query(stream, useHeaderRow,(CsvConfiguration)configuration); + } +} + +public interface IConfiguration +{ +} + +public class CsvConfiguration : IConfiguration +{ + public char Seperator { get; set; } + public Func GetStreamReaderFunc { get; set; } + private static readonly CsvConfiguration _defaultConfiguration = new CsvConfiguration() + { + Seperator = ',', + GetStreamReaderFunc = (stream)=> new StreamReader(stream) + }; + internal static CsvConfiguration GetDefaultConfiguration() => _defaultConfiguration; +} + +public class CsvReader +{ + internal static IEnumerable> Query(FileStream stream, bool useHeaderRow, CsvConfiguration configuration) + { + if (configuration == null) + configuration = CsvConfiguration.GetDefaultConfiguration(); + using (var reader = configuration.GetStreamReaderFunc(stream)) + { + char[] seperators = { configuration.Seperator }; + + var row = string.Empty; + string[] read; + var firstRow = true; + Dictionary headRows = new Dictionary(); + while ((row = reader.ReadLine()) != null) + { + read = row.Split(seperators, StringSplitOptions.None); + + //header + if (useHeaderRow) + { + if (firstRow) + { + firstRow = false; + for (int i = 0; i <= read.Length - 1; i++) + headRows.Add(i, read[i]); + continue; + } + + var cell = Helpers.GetEmptyExpandoObject(headRows); + for (int i = 0; i <= read.Length - 1; i++) + cell[headRows[i]] = read[i]; + + yield return cell; + continue; + } + + + //body + { + var cell = Helpers.GetEmptyExpandoObject(read.Length - 1); + for (int i = 0; i <= read.Length - 1; i++) + cell[Helpers.GetAlphabetColumnName(i)] = read[i]; + yield return cell; + } + } + } + } +} + +namespace MiniExcelLibs.Utils +{ + using System; + using System.Collections; + using System.Collections.Generic; + using System.Dynamic; + using System.Globalization; + using System.Linq; + using System.Reflection; + using System.Text.RegularExpressions; + + internal static class Helpers + { + private static Dictionary _IntMappingAlphabet = new Dictionary(); + private static Dictionary _AlphabetMappingInt = new Dictionary(); + static Helpers() + { + for (int i = 0; i <= 255; i++) + { + _IntMappingAlphabet.Add(i, IntToLetters(i)); + _AlphabetMappingInt.Add(IntToLetters(i), i); + } + } + + public static string GetAlphabetColumnName(int ColumnIndex) => _IntMappingAlphabet[ColumnIndex]; + public static int GetColumnIndex(string columnName) => _AlphabetMappingInt[columnName]; + + internal static string IntToLetters(int value) + { + value = value + 1; + string result = string.Empty; + while (--value >= 0) + { + result = (char)('A' + value % 26) + result; + value /= 26; + } + return result; + } + + public static IDictionary GetEmptyExpandoObject(int maxColumnIndex) + { + // TODO: strong type mapping can ignore this + // TODO: it can recode better performance + var cell = (IDictionary)new ExpandoObject(); + for (int i = 0; i <= maxColumnIndex; i++) + { + var key = GetAlphabetColumnName(i); + if (!cell.ContainsKey(key)) + cell.Add(key, null); + } + return cell; + } + + public static IDictionary GetEmptyExpandoObject(Dictionary hearrows) + { + // TODO: strong type mapping can ignore this + // TODO: it can recode better performance + var cell = (IDictionary)new ExpandoObject(); + foreach (var hr in hearrows) + if (!cell.ContainsKey(hr.Value)) + cell.Add(hr.Value, null); + return cell; + } + + public static IEnumerable GetPropertiesWithSetter(this Type type) + { + return type.GetProperties(BindingFlags.SetProperty | + BindingFlags.Public | + BindingFlags.Instance).Where(prop => prop.GetSetMethod() != null); + } + + public static PropertyInfo[] GetSubtypeProperties(ICollection value) + { + var collectionType = value.GetType(); + + Type gType; + if (collectionType.IsGenericTypeDefinition || collectionType.IsGenericType) + gType = collectionType.GetGenericArguments().Single(); + else if (collectionType.IsArray) + gType = collectionType.GetElementType(); + else + throw new NotImplementedException($"{collectionType.Name} type not implemented,please issue for me, https://github.com/shps951023/MiniExcel/issues"); + if (typeof(IDictionary).IsAssignableFrom(gType)) + throw new NotImplementedException($"{gType.Name} type not implemented,please issue for me, https://github.com/shps951023/MiniExcel/issues"); + var props = gType.GetProperties(BindingFlags.Public | BindingFlags.Instance); + if (props.Length == 0) + throw new InvalidOperationException($"Properties count is 0"); + return props; + } + + private static readonly Regex EscapeRegex = new Regex("_x([0-9A-F]{4,4})_"); + public static string ConvertEscapeChars(string input) + { + return EscapeRegex.Replace(input, m => ((char)uint.Parse(m.Groups[1].Value, NumberStyles.HexNumber)).ToString()); + } + + } + +} + +