|
|
@@ -0,0 +1,259 @@
|
|
|
+import os
|
|
|
+import struct
|
|
|
+import pandas as pd
|
|
|
+
|
|
|
+from datetime import datetime
|
|
|
+from pathlib import Path
|
|
|
+from struct import pack, unpack
|
|
|
+from collections import defaultdict
|
|
|
+
|
|
|
+from config import config
|
|
|
+from chive.service import MessageThread
|
|
|
+from chive.file_watcher import HandlerBase, FileWatcher, WatcherMessageHandler, WatcherZookeeperHandler
|
|
|
+
|
|
|
+
|
|
|
+BLOCK_FOLDER = r'T0002\blocknew'
|
|
|
+BLOCK_CFG_FILE = 'blocknew.cfg'
|
|
|
+
|
|
|
+CACHE_FOLDER = r"T0002\hq_cache"
|
|
|
+HY_CFG_FILE = r"tdxhy.cfg"
|
|
|
+SZM_FILE = r"szm.tnf"
|
|
|
+SHM_FILE = r"shm.tnf"
|
|
|
+
|
|
|
+
|
|
|
+class TDXConceptsHandler(HandlerBase):
|
|
|
+ _name = 'tdx.concepts.handler'
|
|
|
+
|
|
|
+ def dispatch(self, name, file_names):
|
|
|
+ for file_name in file_names:
|
|
|
+ file_path = Path(file_name)
|
|
|
+ content = file_path.read_text(encoding='gbk', errors='ignore')
|
|
|
+
|
|
|
+
|
|
|
+# 负责读取本地通达信数据
|
|
|
+class TDXClient(MessageThread):
|
|
|
+ def __init__(self, tdx_main_folder, pulsar_client=None, zk_client=None):
|
|
|
+ super().__init__(pulsar_client=pulsar_client, zk_client=zk_client)
|
|
|
+ self.main_path = Path(tdx_main_folder)
|
|
|
+ self.block_path = Path(self.resolve(BLOCK_FOLDER))
|
|
|
+ self.__base_all = None
|
|
|
+
|
|
|
+ def resolve(self, folder):
|
|
|
+ return str(self.main_path / folder)
|
|
|
+
|
|
|
+ def get_block_stocks(self, block_name=None):
|
|
|
+ now = datetime.now()
|
|
|
+ block_name = block_name or now.strftime('%y%m%d')
|
|
|
+ block_path = self.block_path / f'{block_name}.blk'
|
|
|
+ if block_path.exists():
|
|
|
+ block_stocks = [s for s in block_path.read_text().split('\n') if s.strip()]
|
|
|
+ return [f'{stock_code[1:]}.SZ' if stock_code[0] == '0' else f'{stock_code[1:]}.SH'
|
|
|
+ for stock_code in block_stocks]
|
|
|
+
|
|
|
+ # region custom block
|
|
|
+ def get_cust_blocks(self):
|
|
|
+ blocks = {}
|
|
|
+
|
|
|
+ cfg_file = self.block_path / BLOCK_CFG_FILE
|
|
|
+ if not cfg_file.exists():
|
|
|
+ return blocks
|
|
|
+ buff = cfg_file.read_bytes()
|
|
|
+ count = int(len(buff) / 120)
|
|
|
+
|
|
|
+ buff_blocks = [buff[i * 120:i * 120 + 120] for i in range(count)]
|
|
|
+ for block in buff_blocks:
|
|
|
+ name = block[:50].decode('gbk', errors="ignore").replace('\x00', '')
|
|
|
+ display_name = block[50:].decode('gbk', errors="ignore").replace('\x00', '')
|
|
|
+ blocks[name] = display_name
|
|
|
+ return blocks
|
|
|
+
|
|
|
+ def save_cust_block(self, blocks: dict, replace=True):
|
|
|
+ file_blocks = self.get_cust_blocks()
|
|
|
+ for key, stocks in blocks.items():
|
|
|
+ name, display_name = key
|
|
|
+ block_path = (self.block_path / f'{name}.blk')
|
|
|
+ if not replace:
|
|
|
+ if block_path.exists():
|
|
|
+ file_stocks = block_path.read_text().split('\n')
|
|
|
+ stocks = file_stocks + [s for s in stocks if s and s not in file_stocks]
|
|
|
+ block_path.write_text('\n'.join(stocks))
|
|
|
+ file_blocks[name] = display_name
|
|
|
+
|
|
|
+ buff = b''
|
|
|
+ for block in file_blocks:
|
|
|
+ buff += pack(f'>{50}s', block.encode('gbk'))
|
|
|
+ buff += pack(f'>{70}s', file_blocks[block].encode('gbk'))
|
|
|
+
|
|
|
+ cfg_file = self.block_path / BLOCK_CFG_FILE
|
|
|
+ cfg_file.write_bytes(buff)
|
|
|
+ # endregion
|
|
|
+
|
|
|
+ # 获取行业数据
|
|
|
+ def _get_stock_industry(self):
|
|
|
+ tdxhy_path = self.resolve(HY_CFG_FILE)
|
|
|
+ names = "market symbol industry_code idontcare1 idontcare2 industry_second_code".split()
|
|
|
+ usecols = "market symbol industry_code industry_second_code".split()
|
|
|
+ tdxhy = pd.read_csv(
|
|
|
+ tdxhy_path,
|
|
|
+ sep="|",
|
|
|
+ names=names,
|
|
|
+ usecols=usecols,
|
|
|
+ dtype={"symbol": str},
|
|
|
+ )
|
|
|
+ return tdxhy
|
|
|
+
|
|
|
+ # 行业代码码值
|
|
|
+ def _get_industry(self):
|
|
|
+ incon_path = self.resolve(r"incon.dat")
|
|
|
+ incon = pd.read_csv(incon_path, encoding="gb2312", names=["index"])
|
|
|
+ fx0 = lambda x: x.split("|")[0] if "|" in x else ""
|
|
|
+ incon["industry_code"] = incon["index"].apply(fx0)
|
|
|
+ fx1 = lambda x: x.split("|")[1] if "|" in x else ""
|
|
|
+ incon["industry_name"] = incon["index"].apply(fx1)
|
|
|
+ usecols = ["industry_code", "industry_name"]
|
|
|
+ return incon[usecols]
|
|
|
+
|
|
|
+ # 股票代码对应拼音缩写
|
|
|
+ def _read_tnf(self, path):
|
|
|
+ market = path.split(".")[0][-3:]
|
|
|
+ with open(path, "rb") as f:
|
|
|
+ buff = f.read()
|
|
|
+
|
|
|
+ data = buff[50:]
|
|
|
+ l = len(data) // 314
|
|
|
+ fx = lambda x: str(x, encoding="gbk").strip("\x00")
|
|
|
+ sm = {"szm": ("00", "30"), "shm": ("60", "68")}
|
|
|
+
|
|
|
+ stocks = []
|
|
|
+ for x in [data[i * 314 : (i + 1) * 314] for i in range(l)]:
|
|
|
+ code = fx(x[:6])
|
|
|
+ if code.startswith(sm[market]):
|
|
|
+ name = fx(x[23:41])
|
|
|
+ shortcode = fx(x[285:293])
|
|
|
+
|
|
|
+ stocks += [[code, name, shortcode]]
|
|
|
+ return stocks
|
|
|
+
|
|
|
+ # 股票K线数据源文件
|
|
|
+ def _get_stock_names(self):
|
|
|
+ szm_path = self.resolve(SZM_FILE)
|
|
|
+ shm_path = self.resolve(SHM_FILE)
|
|
|
+
|
|
|
+ szm = self._read_tnf(szm_path)
|
|
|
+ shm = self._read_tnf(shm_path)
|
|
|
+
|
|
|
+ stocks = pd.DataFrame(szm + shm, columns=["symbol", "name", "shortcode"])
|
|
|
+ return stocks
|
|
|
+
|
|
|
+ # 整合基本数据
|
|
|
+ def _get_base_all(self):
|
|
|
+ stock_industry = self._get_stock_industry()
|
|
|
+ industry_name = self._get_industry()
|
|
|
+ stock_name = self._get_stock_names()
|
|
|
+
|
|
|
+ base = pd.merge(stock_name, stock_industry, how="left", on="symbol")
|
|
|
+ base = pd.merge(base, industry_name, how="left", on="industry_code")
|
|
|
+ base = pd.merge(
|
|
|
+ base,
|
|
|
+ industry_name,
|
|
|
+ how="left",
|
|
|
+ left_on="industry_second_code",
|
|
|
+ right_on="industry_code",
|
|
|
+ )
|
|
|
+
|
|
|
+ fx = lambda x: ".sh" if x else ".sz"
|
|
|
+ base["ts_code"] = base["symbol"] + base["market"].apply(fx)
|
|
|
+
|
|
|
+ base.rename(
|
|
|
+ columns={
|
|
|
+ "industry_name_x": "industry_name",
|
|
|
+ "industry_name_y": "industry_detail",
|
|
|
+ "industry_code_x": "industry_code",
|
|
|
+ },
|
|
|
+ inplace=True,
|
|
|
+ )
|
|
|
+ # base = base.drop(['industry_code_y'], axis=1)
|
|
|
+ usecols = "ts_code symbol name shortcode industry_name industry_detail".split()
|
|
|
+ return base[usecols]
|
|
|
+
|
|
|
+ def get_block_file(self, block='gn'):
|
|
|
+ file_name = f'T0002/hq_cache/block_{block}.dat'
|
|
|
+ file_path = self.resolve(file_name)
|
|
|
+ with open(file_path, 'rb') as f:
|
|
|
+ buff = f.read()
|
|
|
+
|
|
|
+ head = unpack('<384sh', buff[:386])
|
|
|
+ blk = buff[386:]
|
|
|
+ blocks = [blk[i * 2813:(i + 1) * 2813] for i in range(head[1])]
|
|
|
+ bk_list = []
|
|
|
+ for bk in blocks:
|
|
|
+ name = bk[:8].decode('gbk', 'ignore').strip('\x00')
|
|
|
+ num, t = unpack('<2h', bk[9:13])
|
|
|
+ stks = bk[13:(12 + 7 * num)].decode('gbk', 'ignore').split('\x00')
|
|
|
+ bk_list.append([name, block, num, stks])
|
|
|
+ # return pd.DataFrame(bk_list, columns=['name', 'tp', 'num', 'stocks'])
|
|
|
+ return bk_list
|
|
|
+
|
|
|
+ def _read_concepts(self, content):
|
|
|
+ concepts = {}
|
|
|
+ for line in content.split('\n'):
|
|
|
+ row = line.split(',')
|
|
|
+ if len(row) < 4:
|
|
|
+ continue
|
|
|
+ concept_code, concept_name, stock_code, stock_name = line.split(',')
|
|
|
+ concepts.setdefault(concept_code, {'name': concept_name, 'stocks': []})
|
|
|
+ concepts[concept_code]['stocks'].append([stock_code, stock_name])
|
|
|
+ return concepts
|
|
|
+
|
|
|
+ def get_export_concepts(self):
|
|
|
+ file_name = u'T0002/export/概念板块.txt'
|
|
|
+ file_path = Path(self.resolve(file_name))
|
|
|
+ if file_path.exists():
|
|
|
+ content = file_path.read_text(encoding='gbk', errors='ignore')
|
|
|
+ return self._read_concepts(content)
|
|
|
+
|
|
|
+ # 读取K线源文件
|
|
|
+ def _read_kline(self, filepath):
|
|
|
+ with open(filepath, "rb") as f:
|
|
|
+ usecols = "trade_date open high low close amount vol openinterest".split()
|
|
|
+ buffers = []
|
|
|
+ while True:
|
|
|
+ buffer = f.read(32)
|
|
|
+ if not buffer:
|
|
|
+ break
|
|
|
+ buffer = struct.unpack("lllllfll", buffer)
|
|
|
+ buffers.append(buffer)
|
|
|
+ kline = pd.DataFrame(buffers, columns=usecols)
|
|
|
+
|
|
|
+ kline["trade_date"] = kline["trade_date"].astype(str)
|
|
|
+
|
|
|
+ price_columns = ["open", "high", "low", "close"]
|
|
|
+ kline[price_columns] = kline[price_columns].apply(lambda x: x / 100)
|
|
|
+ return kline
|
|
|
+
|
|
|
+ # 获取基本数据
|
|
|
+ def get_base_all(self):
|
|
|
+ if not self.__base_all:
|
|
|
+ self.__base_all = self._get_base_all()
|
|
|
+ return self.__base_all
|
|
|
+
|
|
|
+ # 获取日K线数据
|
|
|
+ def get_kline_daily(self, ts_code):
|
|
|
+ filename = ts_code.split(".")[1] + ts_code.split(".")[0] + ".day"
|
|
|
+ filepath = self.resolve('/'.join(["vipdoc", ts_code.split(".")[1], "lday", filename]))
|
|
|
+ kline = self._read_kline(filepath)
|
|
|
+ kline["ts_code"] = ts_code
|
|
|
+ kline.index = pd.to_datetime(kline["trade_date"])
|
|
|
+ kline.index.name = "index"
|
|
|
+ kline = kline.rename(columns={"vol": "volume"})
|
|
|
+ usecols = (
|
|
|
+ "ts_code trade_date open high low close amount volume openinterest".split()
|
|
|
+ )
|
|
|
+ return kline[usecols]
|
|
|
+
|
|
|
+ def start(self):
|
|
|
+ super().start()
|
|
|
+ if self.pulsar_client:
|
|
|
+ handlers = [WatcherMessageHandler(pulsar_client=self.pulsar_client)]
|
|
|
+ watcher = FileWatcher(self.main_path, handlers)
|
|
|
+ watcher.add_file_to_watch('tdx_concept_block', [HY_CFG_FILE, SZM_FILE, SHM_FILE])
|