admin 管理员组

文章数量: 887021

from pymongo import MongoClient
from requests_html import HTMLSession
import time
import random
from threading import Thread


session = HTMLSession()
headers = [{'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
           'Accept-Encoding':'gzip, deflate, sdch',
           'Accept-Language':'zh-CN,zh;q=0.8',
           'Connection':'keep-alive',
           'Host':'vip.stock.finance.sina',
           'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
           },
            {'user-agent' : "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)",},
           {'user-agent':"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36",},
           {'user-agent':'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0',},
           {'user-agent':'Mozilla/5.0 (iPad; CPU OS 5_0 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A334 Safari/7534.48.3',},
           ]
stypes = {'300':'sz','600':'sh','601':'sh','603':'sh',
              '900':'sh','000':'sz','200':'sz','002':'sz'}
#stypes  根据股票代码前三位判断上市交易所代码
wztypes = {'AllNewsStock':'个股资讯','stockIndustryNews':'行业资讯','FinManDiv':'理财师解读',
           'gzbc':'更正或补充','gszc':'公司章程','gqfzgg':'股权分置改革说明书','hfbg':'回访报告',
           'lsgg':'临时公告','ndbg':'年度报告','ndbgzy':'年度报告(摘要)','pgsms':'配股说明书',
           'qzssgg':'权证上市公告书','qzsms':'权证说明书','sjdbg':'三季度报告','sjdbgzy':'三季度报告(摘要)',
           'ssggs':'上市公告书','yjdbg':'一季度报告','yjdbgzy':'一季度报告(摘要)','zgsmssbg':'招股说明书(申报稿)',
           'zgsmsyxs':'招股说明书/意向书','zqbg':'中期报告','zqbgzy':'中期报告(摘要)'
           }
def create_db():
    #创建mongo数据库,并建立集合
    client = MongoClient('localhost',27017)
    db = client.sina_finance
    col_basic = db.basic
    col_detail = db.detail
    return col_basic,col_detail

def get_basic_data(html,scode):
    #获取基本信息表的内容
    sname = html.find('div.hq_title',first = True).find('h1',first = True).text[:-11]
    col_basic.save({'_id':scode,'sname':sname})

def exist_next_page(html):
    #判断当前页面是否存在下一页
    p = html.find("div[style = 'margin-top:10px;float:right;margin-right:100px;']",first = True)
    if p is None 

本文标签: 爬虫 新浪财经 python