hikyuu2/hikyuu/tools/fenge.py

140 lines
5.1 KiB
Python
Raw Normal View History

#!/usr/bin/python
# -*- coding: utf8 -*-
# cp936
import tables
import datetime
import sys
import math
def ProgressBar(cur, total):
percent = '{:.2%}'.format(cur / total)
sys.stdout.write('\r')
sys.stdout.write("[%-50s] %s" % ('=' * int(math.floor(cur * 50 / total)),percent))
sys.stdout.flush()
class FiveMinDataRecordH5File(tables.IsDescription):
datetime = tables.UInt64Col() #IGNORE:E1101
openPrice = tables.UInt32Col() #IGNORE:E1101
highPrice = tables.UInt32Col() #IGNORE:E1101
lowPrice = tables.UInt32Col() #IGNORE:E1101
closePrice = tables.UInt32Col() #IGNORE:E1101
transAmount = tables.UInt64Col() #IGNORE:E1101
transCount = tables.UInt64Col() #IGNORE:E1101
def fenge(src_file_name, dest_file_name, start_date, end_date):
"""
将原始数据按日期范围分隔方便同步备份
"""
print("正在进行,请稍候.....")
src_hdf5 = tables.openFile(src_file_name, mode='r', filters=tables.Filters(complevel=9,complib='zlib', shuffle=True))
dest_hdf5 = tables.openFile(dest_file_name, mode = "w", filters=tables.Filters(complevel=9,complib='zlib', shuffle=True))
dest_group = dest_hdf5.create_group("/", "data")
start = start_date
end = end_date
all_table = [x for x in src_hdf5.walkNodes("/data")]
total = len(all_table)
for i in range(1,total):
ProgressBar(i+1, total)
src_table = all_table[i]
#print(src_table.name)
dest_table = dest_hdf5.createTable(dest_group, src_table.name, FiveMinDataRecordH5File, src_table.name)
dest_row = dest_table.row
for x in src_table:
if x['datetime'] >= end:
break
if start <= x['datetime'] \
and x['lowPrice'] <= x['openPrice'] <=x['highPrice'] \
and x['lowPrice'] <= x['closePrice'] <= x['highPrice']:
dest_row['datetime'] = x['datetime']
dest_row['openPrice'] = x['openPrice']
dest_row['highPrice'] = x['highPrice']
dest_row['lowPrice'] = x['lowPrice']
dest_row['closePrice'] = x['closePrice']
dest_row['transAmount'] = x['transAmount']
dest_row['transCount'] = x['transCount']
dest_row.append()
dest_table.flush()
src_hdf5.close()
dest_hdf5.close()
def fenge2(src_file_name, dest_dir_name):
"""
将每个stock的数据拆分成独立的文件
"""
src_hdf5 = tables.openFile(src_file_name, mode='r', filters=tables.Filters(complevel=9,complib='zlib', shuffle=True))
all_table = [x for x in src_hdf5.walkNodes("/")]
for i in range(1,len(all_table)):
src_table = all_table[i]
#print src_table.name
dest_table = dest_hdf5.createTable("/", "data", FiveMinDataRecordH5File, src_table.name)
dest_row = dest_table.row
pre_date = 0
for x in src_table:
if x['datetime'] <= pre_date:
print(src_table, pre_date, x['datetime'])
continue
try:
tmp_date = x['datetime']
year = tmp_date / 100000000
month = (tmp_date - year * 100000000) / 1000000
day = (tmp_date - tmp_date / 1000000 * 1000000) / 10000;
ttdate = datetime.date(year, month, day)
except ValueError:
print(src_table, tmp_date)
else:
if x['lowPrice'] <= x['openPrice'] <=x['highPrice'] \
and x['lowPrice'] <= x['closePrice'] <= x['highPrice']:
dest_row['datetime'] = x['datetime']
dest_row['openPrice'] = x['openPrice']
dest_row['highPrice'] = x['highPrice']
dest_row['lowPrice'] = x['lowPrice']
dest_row['closePrice'] = x['closePrice']
dest_row['transAmount'] = x['transAmount']
dest_row['transCount'] = x['transCount']
dest_row.append()
else:
print(src_table, x['datetime'], "warning 2")
dest_table.flush()
dest_hdf5.close()
src_hdf5.close()
if __name__ == "__main__":
import time
starttime = time.time()
#src_file_name = '/home/fasiondog/workspace/hikyuu/test/data/sz_day.h5'
#dest_dir_name = "/home/fasiondog/workspace/hikyuu/test/data/sz/day/"
#fenge2(src_file_name, dest_dir_name)
src_dir = 'c:\\stock'
dst_dir = u'D:\\快盘\\80.stock\\stock'
data_type = '5min' #only '5min' or '1min'
market = 'sz'
year = 2016
src_file_name = src_dir + '\\' + market + '_' + data_type + '.h5' #d:\\stock\\sh_5min.h5'
dest_file_name = dst_dir + "\\" + market + "\\" + data_type + "\\" + str(year) + ".h5"
#dest_file_name = u'D:\\快盘\\80.stock\\stock\\sh\\5min\\2013.h5'
start_date = year * 100000000 + 1010000 #201301010000
end_date = start_date + 100000000 #201401010000
fenge(src_file_name, dest_file_name, start_date, end_date)
endtime = time.time()
print("\n")
print("%.2fs" % (endtime-starttime))
print("%.2fm" % ((endtime-starttime)/60))