py

空扰寡人 提交于 2019-12-05 05:28:49

大神

# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import lxml
import json
import re
import time
import tushare as ts
import pandas as pd
import pymysql

class Mysqldb():
    def __init__(self, conn, sql):
        self.conn = conn
        self.cursor = cursor
        self.sql = sql

    def execute(self, sql):
        try:
            self.cursor.execute(self.sql)
        except:
            self.dbname.rollback()
            print("SQL执行失败,数据已回滚")

    def commit(self):
        self.conn.commit()

    def close(self):
        self.cursor.close()
        self.conn.close()

def Tstockbasic():
    # 调用stock_basic,获取
    stkbasic = pro.stock_basic(list_status='L', fields='ts_code,symbol,name,industry,list_date,exchange')

    oldStklist = pd.read_sql("select ts_code from stkbasic_data",con=conn,index=False)
    for index, row in df.iterrows():
        ts_code = row['ts_code']
        s

    pd.read_sql(sql,con=conn)

def EMydSpider(current):
    # 抓取东财个股盘口异动数据:http://quote.eastmoney.com/changes
    pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='123456', dbname='quantitative_trading_data', charset='utf8')
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.5.3738.400"}
    url = "http://push2ex.eastmoney.com/getAllStockChanges?type=8201,8202,8193,4,32,64,8207,8209,8211,8213,8215,8204,8203,8194,8,16,128,8208,8210,8212,8214,8216"
    for page in range(200):
        param = {"pageindex": page, "pagesize": '64', "ut": '7eea3edcaed734bea9cbfc24409ed989', "dpt": 'wzchanges'}
        html = json.loads(requests.get(url=url, params=param, headers=header).text)
        if html['data'] is None:
            print("东方财富:共爬取%d页数据" % page)
            print("东方财富:个股盘口异动数据已抓取完成")
            break
        else:
            allstock = html['data']['allstock']
            for stock in allstock:
                stk_code = stock['c']
                stk_name = stock['n']
                chg_time = stock['tm']
                chg_type = stock['t']
                chg_value = stock['i']

                sql = "insert into emyd_data(current,stk_code,stk_name,chg_time,chg_type,chg_value) values(%s,%s,%s,%s,%s,%s)" %(current, stk_code, stk_name, chg_time, chg_type, chg_value)
                Mysqldb.execute(sql)
    Mysqldb.commit()
    Mysqldb.close()

def TXfjSpider(current,code_list):
    # 抓取每日腾讯股票分价表数据
    #pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='123456', dbname='quantitative_trading_data',charset='utf8')
    pymysql.connect()
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.5.3738.400"}
    for code in code_list:
        url = "http://stock.gtimg.cn/data/index.php?appn=price&c=" + code
        html = requests.get(url=url,headers=header).text

        start = re.search('v\wp.{10}\d+\,\d+\,\d+\,\"', html).group()
        end = re.search('\"\W+', html).group()

        fj_list = txfj_html.replace(start, '').replace(end, '').split('^')
        for fj in fj_list:
            price = fj.split('~')[0]
            volumn = fj.split('~')[2]
            sql = "insert into txfj_data(current,stk_code,price,volumn) values(%s,%s,%s,%s)" %(current, code, price, volumn)
            Mysqldb.execute(sql)
    Mysqldb.commit()
    Mysqldb.close()

def TXddSpider(current,code_list):
    # 抓取每日腾讯股票大单数据(大于100万)
    pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='123456', dbname='quantitative_trading_data',charset='utf8')
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.5.3738.400"}
    url = "http://stock.finance.qq.com/sstock/list/view/dadan.php?"
    for page in range(50):
        for code in code_list:
            param = {"t": "js","c": code,"max": 80,"p": page,"opt": 10,"o": 0}
            html = requests.get(url=url, headers=header).text

            start = re.search('var v_dadan_data_.{12}\'', html).group()
            end = re.search('\'\]\;', html).group()

            dd_list = html.replace(start, '').replace(end, '').split('^')
            for dd in dd_list:
                ddprice = dd.split('~')[0]
                volumn = dd.split('~')[2]
                sql = "insert into txdd_data(current,stk_code,ddprice,volumn) values(%s,%s,%s,%s)" % (current, code, ddprice, volumn)
                Mysqldb.execute(sql)
    Mysqldb.commit()
    Mysqldb.close()

if __name__ == "__main__":
    current = time.strftime("%Y%m%d", time.localtime())
    # format依次传入:username、password、host、port、dbname
    conn = pymysql.connect('mysql://{}:{}@{}:{}/{}?charset=utf8'.format("root", "123456","127.0.0.1", "3306","quantitative_trading_data"))
    cursor = conn.cursor()
    EMydSpider(current)
    TXfjSpider(current, code_list)
    TXddSpider(current, code_list)  #SZ300033

 

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!