- 微信
- 微博
  
  分享文章到微博
- 复制链接
  
  复制链接到剪贴板

Selenium自动下载qbt数据

诡途发表于 2021/11/19 01:17:21 2021/11/19

【摘要】 Selenium模拟浏览器自动下载数据一开始固定休眠时，下载几个文件就会被踢下线，随机休眠后基本无问题了 from selenium import webdriver import time,os,...

Selenium模拟浏览器自动下载数据
一开始固定休眠时，下载几个文件就会被踢下线，
随机休眠后基本无问题了

from selenium import webdriver
import time,os,shutil
import random
import pandas as pd
import numpy as np

def bitalk_log(username,password,date_xl,tmp_path,data_path):
    # 请求登录页面
    try:
        chrome_options = webdriver.ChromeOptions()
        # 设置好应用扩展
        #加载狗扩展程序，需要用谷歌浏览器开发者压缩为crx文件
        extension_path = r"F:\JupyterNotebook\xxx.crx"
        chrome_options.add_extension(extension_path)
        
        '''
        download.default_directory：设置下载路径
        profile.default_content_settings.popups：设置为 0 禁止弹出窗口
        '''
        #添加下载路径
        prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory':tmp_path}
        chrome_options.add_experimental_option('prefs', prefs)

        # 启动浏览器，并设置好wait
        drive = webdriver.Chrome(chrome_options=chrome_options)
        url = 'http://qbt.ecdataway.com/shop'
        drive.get(url)
        # 随机睡眠时间
        tm=random.uniform(2,4)
        time.sleep(tm)
        # 找到用户名输入用户名
        user = drive.find_element_by_name("LoginForm[username]")
        user.send_keys(username)
        # 找到密码输入密码
        pwd=drive.find_element_by_id("LoginForm_password")
        pwd.send_keys(password)
#         # 点击登录按钮实现登录
        drive.find_element_by_class_name("login_btn").click()
#         # 登录成功后跳转首页，进行加载，休眠5秒加载页面
        tm=random.uniform(4,6)
        time.sleep(tm)
        drive.find_element_by_xpath('//ul/li/a[text()="店铺分析"]').click()
        tm=random.uniform(4,6)
        time.sleep(tm)
        links = drive.find_element_by_tag_name("tbody").find_elements_by_tag_name("tr")
        shop_num=len(links)
        print("店铺数量 %s "%shop_num)
        start_num=int(input("从第几家店铺开始："))
        for i in range(start_num,shop_num+1):
            drive.find_element_by_xpath('//ul/li/a[text()="店铺分析"]').click()
            tm=random.uniform(2,3)
            time.sleep(tm)
            drive.find_element_by_xpath('//*[@id="tag-flag"]/tr[%s]/td[12]/div/a[6]/img[@title="分时统计"]'%i).click()
            tm=random.uniform(3,4)
            time.sleep(tm)
            drive.find_element_by_xpath('//*[@id="f1"]/div[1]/div/div[2]/input[2]').click()  #选择上架时间  否
            tm=random.uniform(2,3)
            time.sleep(tm)
            total=""
            shop=drive.find_element_by_xpath('/html/body/div[2]/div[2]/a[2]').text
            for date in date_xl["日期"].astype(str):
                start_date=drive.find_element_by_xpath('//*[@id="start_date_fenshi"]')
                start_date.click()
                start_date.clear()#清除开始时间
                start_date.send_keys(date)#填入开始时间
                end_date=drive.find_element_by_xpath('//*[@id="end_date_fenshi"]')
                end_date.click()
                end_date.clear()
                end_date.send_keys(date)
                tm=random.uniform(1,2)
                time.sleep(tm)
                drive.find_element_by_xpath('//*[@id="f1"]/div[1]/div/div[6]/input[@value="检索"]').click()
                tm=random.uniform(4,5)
                time.sleep(tm)
                lk = drive.find_element_by_tag_name("tbody").find_elements_by_tag_name("tr")
                lk_num=len(lk)
                total_sale=drive.find_element_by_xpath('//*[@id="content"]/div[2]/form/table/tbody/tr[%s]/td[6]/center'%lk_num).text #获取总计金额
                if total_sale!=total:
                    drive.find_element_by_xpath('//*[@id="f1"]/p/input[@value="CSV下载"]').click()
                else:
                    print("可能未加载完成，请稍等！")
                    tm=random.uniform(4,5)
                    time.sleep(tm)
                    drive.find_element_by_xpath('//*[@id="f1"]/p/input[@value="CSV下载"]').click()
                total=total_sale
                tm=random.uniform(4,5)
                time.sleep(tm)
                try:
                    f = os.listdir(tmp_path)[0]
                    while "crdownload" in f:
#                             print("---下载未完成，请稍等---")
                            tm=random.uniform(4,5)
                            time.sleep(tm)
                            f = os.listdir(tmp_path)[0]
                    #找到老的文件所在的位置
                    old_file=os.path.join(tmp_path,f)
                    #指定新文件的位置
                    new_file=os.path.join(data_path,date+shop+".xls")
                    os.rename(old_file,new_file)#重命名文件
                    print("-----%s下载已完成-----"%(date+shop))
                except Exception as e:              
                    print(e)
            print("---%s下载完成，即将开始下载下一个店铺------"%shop)
    except Exception as e:
        print("出现问题",e)
tmp_path=""  #临时下载路径
data_path=""#重命名后数据存储路径
date_xl=pd.read_excel("需要爬取得时间序列.xlsx")
bitalk_log(username,password,date_xl,tmp_path,data_path)

  
 
  1
  2
  3
  4
  5
  6
  7
  8
  9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109

文章来源: blog.csdn.net，作者：诡途，版权归原作者所有，如需转载，请联系作者。

原文链接：blog.csdn.net/qq_35866846/article/details/103298524

点赞
收藏
关注作者

0/1000

抱歉，系统识别当前为高风险访问，暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称，即可参与社区互动！

*长度不超过10个汉字或20个英文字符，设置后3个月内不可修改。

确认取消

加入云驻计划，成为创作者

华为云周边好礼
免费体验产品
特殊身份标识
线下官方门票
内部专家零距离
与10000+优质创作者共同成长

立即加入

Selenium自动下载qbt数据

全部回复

设置昵称

关于作者

目录

加入云驻计划，成为创作者

Selenium自动下载qbt数据

全部回复

设置昵称

关于作者

目录

加入云驻计划，成为创作者

推荐阅读

相关产品