翻页输出东方财富利润表,有时候可以输出三份dataframe,有时候就两份甚至直接element is not attached to the page document,加了显式等待感觉好像没啥用,(另外是不是if中套test会好一点)求帮助谢谢
from selenium import webdriver
import pandas as pd
import numpy as np
from selenium.webdriver.common.by import By
from lxml import etree
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
stock = 'SZ300278'
url='http://f10.eastmoney.com/f10_v2/FinanceAnalysis.aspx?code=%s' %stock
browser = webdriver.Chrome()
browser.get(url)
wait=WebDriverWait(browser, 10)
wait.until(EC.element_to_be_clickable((By.XPATH,'//div[@class="main"]/descendant::ul[@id="lrb_ul"]/li[2]')))
button=browser.find_element(By.XPATH,'//div[@class="main"]/descendant::ul[@id="lrb_ul"]/li[2]')
button.click()
wait.until(EC.element_to_be_clickable((By.ID,'lrb_next')))
next=browser.find_element_by_id('lrb_next')
content_wait=wait.until(EC.presence_of_element_located((By.ID,'report_lrb')))
element=browser.find_element_by_id('report_lrb')
th_content = element.find_elements_by_tag_name('th')
td_content = element.find_elements_by_tag_name('td')
list=[]
for th in th_content:
list.append(th.text)
for td in td_content:
list.append(td.text)
col = len(element.find_elements_by_css_selector('tr:nth-child(1) th'))
list = [list[i:i + col] for i in range(0, len(list), col)]
for x in range(list.count(['', '', '', '', '', ''])):
list.remove(['', '', '', '', '', ''])
lrb_table = pd.DataFrame(list)
print(lrb_table)
while next.get_attribute('style')=='display: inline;':
try:
wait.until(EC.element_to_be_clickable((By.ID,'lrb_next')))
time.sleep(1)
next.click()
next_wait=wait.until(EC.element_to_be_clickable((By.ID,'lrb_next')))
content_wait=wait.until(EC.presence_of_element_located((By.ID,'report_lrb')))
content_wait=wait.until(EC.presence_of_element_located((By.ID,'report_lrb')))
element=browser.find_element_by_id('report_lrb')
th_content = element.find_elements_by_tag_name('th')
td_content = element.find_elements_by_tag_name('td')
list=[]
for th in th_content:
list.append(th.text)
for td in td_content:
list.append(td.text)
col = len(element.find_elements_by_css_selector('tr:nth-child(1) th'))
list = [list[i:i + col] for i in range(0, len(list), col)]
for x in range(list.count(['', '', '', '', '', ''])):
list.remove(['', '', '', '', '', ''])
lrb_table1 = pd.DataFrame(list)
print(lrb_table1)
except:
print('打印完成')