From 45e48bd9e59743362210fe770fc4f9f721d7ccf6 Mon Sep 17 00:00:00 2001 From: fasiondog Date: Thu, 8 Nov 2018 01:38:48 +0800 Subject: [PATCH] data maintain tool(continue) --- hikyuu_python/tools/maintain/hdf5_common.py | 63 +++++ hikyuu_python/tools/maintain/hdf5import.py | 62 +---- hikyuu_python/tools/maintain/pytdx_to_hdf5.py | 256 ++++++++++++++++++ hikyuu_python/tools/maintain/pytdximport.py | 54 ---- ...{sqlite3_baseinfo.py => sqlite3_common.py} | 24 +- 5 files changed, 356 insertions(+), 103 deletions(-) create mode 100644 hikyuu_python/tools/maintain/hdf5_common.py create mode 100644 hikyuu_python/tools/maintain/pytdx_to_hdf5.py delete mode 100644 hikyuu_python/tools/maintain/pytdximport.py rename hikyuu_python/tools/maintain/{sqlite3_baseinfo.py => sqlite3_common.py} (79%) diff --git a/hikyuu_python/tools/maintain/hdf5_common.py b/hikyuu_python/tools/maintain/hdf5_common.py new file mode 100644 index 00000000..f7e54eb7 --- /dev/null +++ b/hikyuu_python/tools/maintain/hdf5_common.py @@ -0,0 +1,63 @@ +# coding:utf-8 +# +# The MIT License (MIT) +# +# Copyright (c) 2010-2017 fasiondog/hikyuu +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import tables as tb + +HDF5_COMPRESS_LEVEL = 9 + +class H5Record(tb.IsDescription): + """HDF5基础K线数据格式(日线、分钟线、5分钟线""" + datetime = tb.UInt64Col() #IGNORE:E1101 + openPrice = tb.UInt32Col() #IGNORE:E1101 + highPrice = tb.UInt32Col() #IGNORE:E1101 + lowPrice = tb.UInt32Col() #IGNORE:E1101 + closePrice = tb.UInt32Col() #IGNORE:E1101 + transAmount = tb.UInt64Col() #IGNORE:E1101 + transCount = tb.UInt64Col() #IGNORE:E1101 + + +class H5Index(tb.IsDescription): + """HDF5扩展K线数据格式(周线、月线、季线、半年线、年线、15分钟线、30分钟线、60分钟线""" + datetime = tb.UInt64Col() #IGNORE:E1101 + start = tb.UInt64Col() #IGNORE:E1101 + + +def open_h5file(dest_dir, market, ktype): + filename = "{}/{}_{}.h5".format(dest_dir, market.lower(), ktype.lower()) + h5file = tb.open_file(filename, "a", filters=tb.Filters(complevel=HDF5_COMPRESS_LEVEL, complib='zlib', shuffle=True)) + return h5file + +def get_h5table(h5file, market, code): + try: + group = h5file.get_node("/", "data") + except: + group = h5file.create_group("/", "data") + + tablename = market.upper() + code + try: + table = h5file.get_node(group, tablename) + except: + table = h5file.create_table(group, tablename, H5Record) + + return table \ No newline at end of file diff --git a/hikyuu_python/tools/maintain/hdf5import.py b/hikyuu_python/tools/maintain/hdf5import.py index 83035dce..7826f81f 100644 --- a/hikyuu_python/tools/maintain/hdf5import.py +++ b/hikyuu_python/tools/maintain/hdf5import.py @@ -15,10 +15,11 @@ import tables as tb from io import SEEK_END, SEEK_SET from common import get_stktype_list, MARKETID -from sqlite3_baseinfo import (create_database, get_marketid, - get_codepre_list, update_last_date) +from sqlite3_common import (create_database, get_marketid, + get_codepre_list, update_last_date) +from hdf5_common import (H5Record, H5Index, + open_h5file, get_h5table) -HDF5_COMPRESS_LEVEL = 9 def ProgressBar(cur, total): percent = '{:.0%}'.format(cur / total) @@ -27,23 +28,6 @@ def ProgressBar(cur, total): sys.stdout.flush() -class H5Record(tb.IsDescription): - """HDF5基础K线数据格式(日线、分钟线、5分钟线""" - datetime = tb.UInt64Col() #IGNORE:E1101 - openPrice = tb.UInt32Col() #IGNORE:E1101 - highPrice = tb.UInt32Col() #IGNORE:E1101 - lowPrice = tb.UInt32Col() #IGNORE:E1101 - closePrice = tb.UInt32Col() #IGNORE:E1101 - transAmount = tb.UInt64Col() #IGNORE:E1101 - transCount = tb.UInt64Col() #IGNORE:E1101 - - -class H5Index(tb.IsDescription): - """HDF5扩展K线数据格式(周线、月线、季线、半年线、年线、15分钟线、30分钟线、60分钟线""" - datetime = tb.UInt64Col() #IGNORE:E1101 - start = tb.UInt64Col() #IGNORE:E1101 - - def tdx_import_stock_name_from_file(connect, filename, market, quotations=None): """更新每只股票的名称、当前是否有效性、起始日期及结束日期 如果导入的代码表中不存在对应的代码,则认为该股已失效 @@ -51,7 +35,7 @@ def tdx_import_stock_name_from_file(connect, filename, market, quotations=None): :param connect: sqlite3实例 :param filename: 代码表文件名 :param market: 'SH' | 'SZ' - :param quotation: 待导入的行情类别,空为导入全部 'stock' | 'fund' | 'bond' | None + :param quotations: 待导入的行情类别列表,空为导入全部 'stock' | 'fund' | 'bond' | None """ cur = connect.cursor() @@ -133,17 +117,7 @@ def tdx_import_day_data_from_file(connect, filename, h5file, market, stock_recor stockid, marketid, code, valid, stktype = stock_record[0], stock_record[1], stock_record[2], stock_record[3],stock_record[4] - try: - group = h5file.get_node("/", "data") - except: - group = h5file.create_group("/", "data") - - tablename = market.upper() + code - try: - table = h5file.get_node(group, tablename) - except: - table = h5file.create_table(group, tablename, H5Record) - + table = get_h5table(h5file, market, code) if table.nrows > 0: startdate = table[0]['datetime']/10000 lastdatetime = table[-1]['datetime']/10000 @@ -214,17 +188,7 @@ def tdx_import_min_data_from_file(connect, filename, h5file, market, stock_recor stockid, marketid, code, valid, stktype = stock_record[0], stock_record[1], stock_record[2], stock_record[3],stock_record[4] - try: - group = h5file.get_node("/", "data") - except: - group = h5file.create_group("/", "data") - - tablename = market.upper() + code - try: - table = h5file.get_node(group, tablename) - except: - table = h5file.create_table(group, tablename, H5Record) - + table = get_h5table(h5file, market, code) if table.nrows > 0: lastdatetime = table[-1]['datetime'] else: @@ -325,9 +289,7 @@ def tdx_import_data(connect, market, ktype, quotations, src_dir, dest_dir, progr """ add_record_count = 0 market = market.upper() - filename = "{}_{}.h5".format(market, ktype) - filename = "{}/{}".format(dest_dir, filename.lower()) - h5file = tb.open_file(filename, "a", filters=tb.Filters(complevel=HDF5_COMPRESS_LEVEL, complib='zlib', shuffle=True)) + h5file = open_h5file(dest_dir, market, ktype) if ktype.upper() == "DAY": suffix = ".day" @@ -349,6 +311,9 @@ def tdx_import_data(connect, market, ktype, quotations, src_dir, dest_dir, progr total = len(a) for i, stock in enumerate(a): + if stock[3] == 0: + continue + filename = src_dir + "\\" + market.lower() + stock[2]+ suffix this_count = func_import_from_file(connect, filename, h5file, market, stock) add_record_count += this_count @@ -598,13 +563,14 @@ if __name__ == '__main__': src_dir = "D:\\TdxW_HuaTai" dest_dir = "c:\\stock" + quotations = ['stock', 'fund'] #通达信盘后数据没有债券 connect = sqlite3.connect(dest_dir + "\\stock.db") create_database(connect) print("导入股票代码表") - tdx_import_stock_name_from_file(connect, src_dir + "\\T0002\\hq_cache\\shm.tnf", 'SH', ['stock', 'fund']) - tdx_import_stock_name_from_file(connect, src_dir + "\\T0002\\hq_cache\\szm.tnf", 'SZ', ['stock', 'fund']) + tdx_import_stock_name_from_file(connect, src_dir + "\\T0002\\hq_cache\\shm.tnf", 'SH', quotations) + tdx_import_stock_name_from_file(connect, src_dir + "\\T0002\\hq_cache\\szm.tnf", 'SZ', quotations) print("\n导入上证日线数据") add_count = 0 diff --git a/hikyuu_python/tools/maintain/pytdx_to_hdf5.py b/hikyuu_python/tools/maintain/pytdx_to_hdf5.py new file mode 100644 index 00000000..eb88c9f6 --- /dev/null +++ b/hikyuu_python/tools/maintain/pytdx_to_hdf5.py @@ -0,0 +1,256 @@ +# coding:utf-8 +# +# The MIT License (MIT) +# +# Copyright (c) 2010-2017 fasiondog/hikyuu +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import sys +import math +import datetime +import sqlite3 +from pytdx.hq import TDXParams + +from common import get_stktype_list +from sqlite3_common import (get_codepre_list, create_database, + get_marketid, get_last_date, get_stock_list) +from hdf5_common import (H5Record, H5Index, + open_h5file, get_h5table) + + +def ProgressBar(cur, total): + percent = '{:.0%}'.format(cur / total) + sys.stdout.write('\r') + sys.stdout.write("[%-50s] %s" % ('=' * int(math.floor(cur * 50 / total)),percent)) + sys.stdout.flush() + + +def to_pytdx_market(market): + """转换为pytdx的market""" + pytdx_market = {'SH': TDXParams.MARKET_SH, 'SZ': TDXParams.MARKET_SZ} + return pytdx_market[market.upper()] + +pytdx_market = {'SH': TDXParams.MARKET_SH, 'SZ': TDXParams.MARKET_SZ} + + +def import_stock_name(connect, api, market, quotations=None): + """更新每只股票的名称、当前是否有效性、起始日期及结束日期 + 如果导入的代码表中不存在对应的代码,则认为该股已失效 + + :param connect: sqlite3实例 + :param api: pytdx接口,必须在函数外进行连接 + :param market: 'SH' | 'SZ' + :param quotations: 待导入的行情类别,空为导入全部 'stock' | 'fund' | 'bond' | None + """ + cur = connect.cursor() + + newStockDict = {} + pytdx_market = to_pytdx_market(market.upper()) + stk_count = api.get_security_count(pytdx_market) + + for i in range(int(stk_count/1000)+1): + stock_list = api.get_security_list(pytdx_market, i * 1000) + for stock in stock_list: + newStockDict[stock['code']] = stock['name'] + + marketid = get_marketid(connect, market) + + stktype_list = get_stktype_list(quotations) + a = cur.execute("select stockid, code, name, valid from stock where marketid={} and type in {}" + .format(marketid, stktype_list)) + a = a.fetchall() + oldStockDict = {} + for oldstock in a: + oldstockid, oldcode, oldname, oldvalid = oldstock[0], oldstock[1], oldstock[2], int(oldstock[3]) + oldStockDict[oldcode] = oldstockid + + # 新的代码表中无此股票,则置为无效 + if (oldvalid == 1) and (oldcode not in newStockDict): + cur.execute("update stock set valid=0 where stockid=%i" % oldstockid) + + # 股票名称发生变化,更新股票名称;如果原无效,则置为有效 + if oldcode in newStockDict: + if oldname != newStockDict[oldcode]: + cur.execute("update stock set name='%s' where stockid=%i" % + (newStockDict[oldcode], oldstockid)) + if oldvalid == 0: + cur.execute("update stock set valid=1, endDate=99999999 where stockid=%i" % oldstockid) + + # 处理新出现的股票 + codepre_list = get_codepre_list(connect, marketid, quotations) + + today = datetime.date.today() + today = today.year * 10000 + today.month * 100 + today.day + count = 0 + for code in newStockDict: + if code not in oldStockDict: + for codepre in codepre_list: + length = len(codepre[0]) + if code[:length] == codepre[0]: + count += 1 + #print(market, code, newStockDict[code], codepre) + sql = "insert into Stock(marketid, code, name, type, valid, startDate, endDate) \ + values (%s, '%s', '%s', %s, %s, %s, %s)" \ + % (marketid, code, newStockDict[code], codepre[1], 1, today, 99999999) + cur.execute(sql) + break + + #print('%s新增股票数:%i' % (market.upper(), count)) + connect.commit() + cur.close() + + +def import_one_stock_day_data(connect, api, h5file, market, code): + market = market.upper() + table = get_h5table(h5file, market, code) + if table.nrows > 0: + start_date = table[0]['datetime'] // 10000 + last_date = table[-1]['datetime'] // 10000 + else: + start_date = None + last_date = 19901219 + + today = datetime.date.today() + + last_y = last_date // 10000 + ny = today.year - last_y + days = (ny + 1) * 250 + step = 800 + + if days < step: + last_m = last_date // 100 - last_y * 100 + last_d = last_date - (last_y * 10000 + last_m * 100) + days = (today - datetime.date(int(last_y), int(last_m), int(last_d))).days + step = days + + print(code, days) + add_record_count = 0 + while days > 0: + x = api.get_security_bars(9, to_pytdx_market(market), '000001', days, step) + days -= step + if x is not None: + add_record_count += len(x) + + return add_record_count + + +def import_data(connect, market, ktype, quotations, api, dest_dir, progress=ProgressBar): + """导入通达信指定盘后数据路径中的K线数据。注:只导入基础信息数据库中存在的股票。 + + :param connect : sqlit3链接 + :param market : 'SH' | 'SZ' + :param ktype : 'DAY' | '1MIN' | '5MIN' + :param quotations: 'stock' | 'fund' | 'bond' + :param src_dir : 盘后K线数据路径,如上证5分钟线:D:\\Tdx\\vipdoc\\sh\\fzline + :param dest_dir : HDF5数据文件所在目录 + :param progress : 进度显示函数 + :return: 导入记录数 + """ + add_record_count = 0 + market = market.upper() + h5file = open_h5file(dest_dir, market, ktype) + + stock_list = get_stock_list(connect, market, quotations) + + total = len(stock_list) + for i, stock in enumerate(stock_list): + if stock[3] == 0: + print(stock[2], 'invalid!!!!') + continue + this_count = import_one_stock_day_data(connect, api, h5file, market, stock[2]) + add_record_count += this_count + if progress: + progress(i, total) + + connect.commit() + h5file.close() + return add_record_count + + +if __name__ == '__main__': + import time + starttime = time.time() + + dest_dir = "c:\\stock" + tdx_server = '119.147.212.81' + tdx_port = 7709 + quotations = ['stock', 'fund', 'bond'] + + connect = sqlite3.connect(dest_dir + "\\stock.db") + create_database(connect) + + from pytdx.hq import TdxHq_API, TDXParams + api = TdxHq_API() + api.connect(tdx_server, tdx_port) + + print("导入股票代码表") + #import_stock_name(connect, api, 'SH', quotations) + #import_stock_name(connect, api, 'SZ', quotations) + + print("\n导入上证日线数据") + add_count = 0 + add_count = import_data(connect, 'SH', 'DAY', ['stock'], api, dest_dir, progress=None) + print(add_count) + + """x = get_last_date(connect, 1) + print(x) + today = datetime.date.today() + y = x // 10000 + m = x // 100 - y * 100 + d = x - (y*10000+m*100) + ny = today.year - y + #days = (today - datetime.date(y, m, d)).days + days = (ny+1)*250 + i = 0 + x = [] + step = 800 + if days <= step: + days = (today - datetime.date(2018, 10, 29)).days + step =days + #print(days) + #x = api.get_security_bars(9, 0, '000001', 0, days) + #print(len(x)) + #print(x[0]) + #print(x[-1]) + while days > 0: + x += api.get_security_bars(9, 0, '000001', days, step) + days -= 800 + i+=1 + print(i ,len(x)) + + #for i in x: + # print(i) + + """ + #x = api.get_security_bars(9, 1, '000001', 6500, 800) + #print(len(x)) + #print(x[0]) + + #print(api.get_security_list(1, 0)) + + api.disconnect() + + connect.close() + + endtime = time.time() + print("\nTotal time:") + print("%.2fs" % (endtime - starttime)) + print("%.2fm" % ((endtime - starttime) / 60)) + diff --git a/hikyuu_python/tools/maintain/pytdximport.py b/hikyuu_python/tools/maintain/pytdximport.py deleted file mode 100644 index e207b873..00000000 --- a/hikyuu_python/tools/maintain/pytdximport.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf8 -*- -# cp936 - -import datetime -from pytdx.hq import TDXParams - -def to_pytdx_market(market): - """转换为pytdx的market""" - pytdx_market = {'SH': TDXParams.MARKET_SH, 'SZ': TDXParams.MARKET_SZ} - return pytdx_market[market.upper()] - -pytdx_market = {'SH': TDXParams.MARKET_SH, 'SZ': TDXParams.MARKET_SZ} - -def pytdx_get_weights(connect, market, code, lastdatetime=None): - xdxr_list = connect.get_xdxr_info(pytdx_market[market.upper()], code) - weight_dict = {} - for xdxr in xdxr_list: - this_date = xdxr['year'] * 100000000 + xdxr['month'] * 1000000 + xdxr['day'] * 10000 - if lastdatetime is not None and this_date <= lastdatetime: - continue - - if this_date in weight_dict: - pass - else: - weight = {'countAsGift' : 0, #每10股送X股 - 'countForSell': 0, #每10股配X股 - 'priceForSell': 0, #配股价 - 'bonus' : 0, #每10股红利 - 'increasement': 0, #每10股转增X股 - 'totalCount' : 0, #总股本(万股) - 'freeCount' : 0 #流通股(万股) - } - weight['bonus'] = 0 if xdxr['fenhong'] is None or xdxr['fenhong'] == 0 else int(round(xdxr['fenhong'], 3) * 1000) - - weight['freeCount'] = 0 if xdxr['panhouliutong'] is None or xdxr['panhouliutong'] == 0 else int(xdxr['panhouliutong']) - weight['totalCount'] = 0 if xdxr['houzongguben'] is None or xdxr['houzongguben'] == 0 else int(xdxr['houzongguben']) - - - -if __name__ == '__main__': - from pytdx.hq import TdxHq_API, TDXParams - api = TdxHq_API() - api.connect('119.147.212.81', 7709) - - market = to_pytdx_market('SZ') - stk_count = api.get_security_count(market) - print(stk_count, int(stk_count/1000)) - x = [] - for i in range(int(stk_count/1000)+1): - x += api.get_security_list(market, i * 1000) - - print(len(x)) - api.disconnect() \ No newline at end of file diff --git a/hikyuu_python/tools/maintain/sqlite3_baseinfo.py b/hikyuu_python/tools/maintain/sqlite3_common.py similarity index 79% rename from hikyuu_python/tools/maintain/sqlite3_baseinfo.py rename to hikyuu_python/tools/maintain/sqlite3_common.py index 12a68f37..26058cb2 100644 --- a/hikyuu_python/tools/maintain/sqlite3_baseinfo.py +++ b/hikyuu_python/tools/maintain/sqlite3_common.py @@ -27,6 +27,7 @@ import sqlite3 from common import MARKETID, get_stktype_list + def create_database(connect): """创建SQLITE3数据库表""" try: @@ -44,7 +45,7 @@ def create_database(connect): def get_marketid(connect, market): cur = connect.cursor() - a = cur.execute("select marketid, market from market where market='{}'".format(market)) + a = cur.execute("select marketid, market from market where market='{}'".format(market.upper())) marketid = [i for i in a] marketid = marketid[0][0] cur.close() @@ -71,3 +72,24 @@ def update_last_date(connect, marketid, lastdate): cur.execute("update LastDate set date={}".format(lastdate)) connect.commit() cur.close() + + +def get_last_date(connect, marketid): + cur = connect.cursor() + a = cur.execute("select lastDate from market where marketid='{}'".format(marketid)) + last_date = [x[0] for x in a][0] + connect.commit() + cur.close() + return last_date + + +def get_stock_list(connect, market, quotations): + marketid = get_marketid(connect, market) + stktype_list = get_stktype_list(quotations) + sql = "select stockid, marketid, code, valid, type from stock where marketid={} and type in {}"\ + .format(marketid, stktype_list) + cur = connect.cursor() + a = cur.execute(sql).fetchall() + connect.commit() + cur.close() + return a \ No newline at end of file