基于探究潜在关联性的初衷,我们旨在探讨是否存在某种显著的共性驱动特定类别股票表现出趋同的市场走势。本文系统地阐述了如何自深圳证券交易所、上海证券交易所、科创板以及北京证券交易所获取各上市公司的历史交易数据,并通过标准化处理各股票的收盘价序列,进而绘制其归一化曲线并转化为图像形式予以保存。随后,我们运用变分自编码器(VAE)模型对选取的部分代表性图像进行了深度学习建模,籍此提取每一张股票价格曲线图像的内在特征向量。接下来,借助聚类分析手段,我们将所有股票的特征向量集合进行分类,从而揭示可能存在的结构化分布。最终,在形成的聚类中识别出走势最为相似的一类股票群体,进而对其基本面及市场表现等特性进行详尽分析,以期发现驱动此类股票同步波动的根本共性因素。
以上这段文字和标题采用通义千问润色

持续更新中。

一.版本信息

二.操作步骤

1.下载各股历史交易数据

主要步骤:

  • 通过akshare下载所有的股票代码
  • 采用selenium登录雪球网,手机扫码登录,确认登录完成后,自动下载各股票的历史交易数据,并存成csv文件

A.代码(download_stocks.py)

import sys
import akshare as ak
from numpy import int64
import pandas as pd
import os
import codecs
import glob
import requests
import random
import time
import json
import datetime
import tqdm
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

def createWebDrv():
   chrome_options = webdriver.ChromeOptions()   
   chrome_options.add_argument('ignore-certificate-errors')
   chrome_options.add_argument('--disable-gpu') 
   chrome_options.add_argument('--start-maximized')
   chrome_options.add_argument('--incognito')
   chrome_options.add_argument("disable-cache")
   chrome_options.add_argument('disable-infobars')
   chrome_options.add_argument('log-level=3')
   chrome_options.add_argument('window-size=1920x917')
   driver = webdriver.Chrome(options=chrome_options)
   return driver

def download_stock_code():
    if not os.path.exists('sh.csv'):
        stock_sh = ak.stock_info_sh_name_code(symbol="主板A股")
        stock_sh = stock_sh[["证券代码", "证券简称"]]
        stock_sh.to_csv('sh.csv')
	
    if not os.path.exists('sz.csv'):
        stock_sz = ak.stock_info_sz_name_code(symbol="A股列表")
        stock_sz["A股代码"] = stock_sz["A股代码"].astype(str).str.zfill(6)
        stock_sz=stock_sz[["A股代码", "A股简称"]]
        stock_sz.to_csv('sz.csv')

    if not os.path.exists('kcb.csv'):
        stock_kcb = ak.stock_info_sh_name_code(symbol="科创板")
        stock_kcb = stock_kcb[["证券代码", "证券简称"]]
        stock_kcb.to_csv('kcb.csv')
	
    if not os.path.exists('bse.csv'):
        stock_bse = ak.stock_info_bj_name_code()
        stock_bse = stock_bse[["证券代码", "证券简称"]]
        stock_bse.to_csv('bse.csv')

def get_stock_list():
    records=[]
    loc_map={
   }
    loc_map['sh']='SH'
    loc_map['sz']='SZ'
    loc_map['kcb']='SH'
    loc_map['bse']='BJ'
    for f in glob.glob("*.csv"):
        loc=loc_map[f.split('.')[0]]
        with codecs.open(f,'r','utf-8') as f:
            for line in f.readlines()[1:]:
                id,code,name=line.strip().split(',')
                records.append("%s%s-%s" % (loc,code,name.replace(' ','').replace('*','')))
    return records

def get_stock_detail(driver,name,save_path,begin,end):
    url='https://stock.xueqiu.com/v5/stock/chart/kline.json?symbol={}&begin={}&end={}&period=day&type=before&indicator=kline'.format(name,begin,end)
    print(url)
    try:
        driver.get(url)
        WebDriverWait(driver, 10, 0.5).until(lambda diver:driver.find_element(By.XPATH, "//pre"))
        json_element = driver.find_element(By.XPATH, "//pre")
        time.sleep(1 + random.random() * 3.2)
        text = json_element.text
        data = json.loads(text)  # str转成json
        item = data['data']['item']  # 从全部数据中取出item项
        df = pd.DataFrame(item, columns=["timestamp", "volume", "open", "high", "low", "close", "chg", "percent", "turnoverrate", "amount", "volume_post", "amount_post"])  # list转为DataFrame数据格式,更方便以后的处理
        df.to_csv(save_path)
    except requests.exceptions.Timeout:
        print(name)
        print(url)

def gen_timestamp(time):
    aa=datetime.datetime.strptime(time, "%Y-%m-%d %H:%M:%S.%f")
    aa = aa + datetime.timedelta(hours=+8) # 中国默认时区
    bb=str(int64(aa.timestamp()*1000))
    return bb

def download_detail_all(driver,records):
    print("stock count:",len(records))
    begin=gen_timestamp('2022-12-05 23:00:00.000000')
    end=gen_timestamp('2024-02-28 23:00:00.000000')
    if not os.path.exists("./detail"):
        os.makedirs("./detail")
    
    for item in tqdm.tqdm(records):
        code
03-03 15:21