You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
321 lines
19 KiB
321 lines
19 KiB
|
7 months ago
|
# coding=utf-8
|
||
|
|
import csv
|
||
|
|
import time
|
||
|
|
from selenium import webdriver
|
||
|
|
|
||
|
|
MAX_RETRY_TIME = 150
|
||
|
|
URL = "https://emweb.securities.eastmoney.com/pc_hsf10/pages/index.html?type=web&code=%s&color=b#/cwfx"
|
||
|
|
|
||
|
|
def get_company_finance(stock_code_list):
|
||
|
|
browser = webdriver.Firefox()
|
||
|
|
for stock_code in stock_code_list:
|
||
|
|
code = stock_code[0]
|
||
|
|
name = stock_code[1]
|
||
|
|
company_name = stock_code[2]
|
||
|
|
stock_code = stock_code[0]
|
||
|
|
if stock_code[0] == '6':
|
||
|
|
stock_code = 'SH' + stock_code
|
||
|
|
elif stock_code[0] == '8' or stock_code[0] == '4':
|
||
|
|
stock_code = 'BJ' + stock_code
|
||
|
|
else:
|
||
|
|
stock_code = 'SZ' + stock_code
|
||
|
|
url = URL % stock_code
|
||
|
|
browser.execute_script('window.open("%s")' % url)
|
||
|
|
|
||
|
|
retry_time = 0
|
||
|
|
while True:
|
||
|
|
try:
|
||
|
|
if retry_time < MAX_RETRY_TIME:
|
||
|
|
browser.switch_to.window(browser.window_handles[1])
|
||
|
|
zcfzb_button = browser.find_elements_by_css_selector('div.section.zcfzb ul:last-child li:nth-child(2)')[0]
|
||
|
|
lrb_button = browser.find_elements_by_css_selector('div.section.lrb ul:last-child li:nth-child(2)')[0]
|
||
|
|
browser.execute_script("arguments[0].click();", zcfzb_button)
|
||
|
|
time.sleep(0.2)
|
||
|
|
browser.execute_script("arguments[0].click();", lrb_button)
|
||
|
|
time.sleep(0.2)
|
||
|
|
break
|
||
|
|
else:
|
||
|
|
print("年报按钮获取失败")
|
||
|
|
return
|
||
|
|
except:
|
||
|
|
retry_time += 1
|
||
|
|
time.sleep(0.2)
|
||
|
|
|
||
|
|
flag = 0
|
||
|
|
retry_time = 0
|
||
|
|
while True:
|
||
|
|
try:
|
||
|
|
if retry_time < MAX_RETRY_TIME:
|
||
|
|
if "银行" in company_name or "信托" in company_name:
|
||
|
|
# 资产总额
|
||
|
|
TotalAssets_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(43) td:nth-child(2) span')[
|
||
|
|
0].text
|
||
|
|
TotalAssets_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(43) td:nth-child(3) span')[
|
||
|
|
0].text
|
||
|
|
TotalAssets_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(43) td:nth-child(4) span')[
|
||
|
|
0].text
|
||
|
|
# 销售收入
|
||
|
|
SalesProceeds_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(2) span')[0].text
|
||
|
|
SalesProceeds_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(3) span')[0].text
|
||
|
|
SalesProceeds_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(4) span')[0].text
|
||
|
|
# 利润总额
|
||
|
|
LRZE_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(35) td:nth-child(2) span')[0].text
|
||
|
|
LRZE_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(35) td:nth-child(3) span')[0].text
|
||
|
|
LRZE_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(35) td:nth-child(4) span')[0].text
|
||
|
|
# 净利润
|
||
|
|
JLR_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(39) td:nth-child(2) span')[0].text
|
||
|
|
JLR_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(39) td:nth-child(3) span')[0].text
|
||
|
|
JLR_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(39) td:nth-child(4) span')[0].text
|
||
|
|
# 负债总计
|
||
|
|
FZZJ_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(74) td:nth-child(2) span')[
|
||
|
|
0].text
|
||
|
|
FZZJ_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(74) td:nth-child(3) span')[
|
||
|
|
0].text
|
||
|
|
FZZJ_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(74) td:nth-child(4) span')[
|
||
|
|
0].text
|
||
|
|
# 纳税额
|
||
|
|
Ratal_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(59) td:nth-child(2) span')[
|
||
|
|
0].text
|
||
|
|
Ratal_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(59) td:nth-child(3) span')[
|
||
|
|
0].text
|
||
|
|
Ratal_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(59) td:nth-child(4) span')[
|
||
|
|
0].text
|
||
|
|
break
|
||
|
|
elif "金融" in company_name or "证券" in company_name or "期货" in company_name:
|
||
|
|
# 资产总额
|
||
|
|
TotalAssets_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(44) td:nth-child(2) span')[
|
||
|
|
0].text
|
||
|
|
TotalAssets_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(44) td:nth-child(3) span')[
|
||
|
|
0].text
|
||
|
|
TotalAssets_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(44) td:nth-child(4) span')[
|
||
|
|
0].text
|
||
|
|
# 销售收入
|
||
|
|
SalesProceeds_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(2) span')[0].text
|
||
|
|
SalesProceeds_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(3) span')[0].text
|
||
|
|
SalesProceeds_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(4) span')[0].text
|
||
|
|
# 利润总额
|
||
|
|
LRZE_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(40) td:nth-child(2) span')[0].text
|
||
|
|
LRZE_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(40) td:nth-child(3) span')[0].text
|
||
|
|
LRZE_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(40) td:nth-child(4) span')[0].text
|
||
|
|
# 净利润
|
||
|
|
JLR_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(42) td:nth-child(2) span')[0].text
|
||
|
|
JLR_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(42) td:nth-child(3) span')[0].text
|
||
|
|
JLR_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(42) td:nth-child(4) span')[0].text
|
||
|
|
# 负债总计
|
||
|
|
FZZJ_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(76) td:nth-child(2) span')[
|
||
|
|
0].text
|
||
|
|
FZZJ_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(76) td:nth-child(3) span')[
|
||
|
|
0].text
|
||
|
|
FZZJ_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(76) td:nth-child(4) span')[
|
||
|
|
0].text
|
||
|
|
# 纳税额
|
||
|
|
Ratal_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(61) td:nth-child(2) span')[
|
||
|
|
0].text
|
||
|
|
Ratal_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(61) td:nth-child(3) span')[
|
||
|
|
0].text
|
||
|
|
Ratal_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(61) td:nth-child(4) span')[
|
||
|
|
0].text
|
||
|
|
break
|
||
|
|
elif "保险" in company_name:
|
||
|
|
# 资产总额
|
||
|
|
TotalAssets_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(51) td:nth-child(2) span')[
|
||
|
|
0].text
|
||
|
|
TotalAssets_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(51) td:nth-child(3) span')[
|
||
|
|
0].text
|
||
|
|
TotalAssets_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(51) td:nth-child(4) span')[
|
||
|
|
0].text
|
||
|
|
# 销售收入
|
||
|
|
SalesProceeds_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(2) span')[0].text
|
||
|
|
SalesProceeds_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(3) span')[0].text
|
||
|
|
SalesProceeds_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(4) span')[0].text
|
||
|
|
# 利润总额
|
||
|
|
LRZE_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(53) td:nth-child(2) span')[0].text
|
||
|
|
LRZE_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(53) td:nth-child(3) span')[0].text
|
||
|
|
LRZE_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(53) td:nth-child(4) span')[0].text
|
||
|
|
# 净利润
|
||
|
|
JLR_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(57) td:nth-child(2) span')[0].text
|
||
|
|
JLR_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(57) td:nth-child(3) span')[0].text
|
||
|
|
JLR_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(57) td:nth-child(4) span')[0].text
|
||
|
|
# 负债总计
|
||
|
|
FZZJ_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(98) td:nth-child(2) span')[
|
||
|
|
0].text
|
||
|
|
FZZJ_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(98) td:nth-child(3) span')[
|
||
|
|
0].text
|
||
|
|
FZZJ_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(98) td:nth-child(4) span')[
|
||
|
|
0].text
|
||
|
|
# 纳税额
|
||
|
|
Ratal_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(72) td:nth-child(2) span')[
|
||
|
|
0].text
|
||
|
|
Ratal_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(72) td:nth-child(3) span')[
|
||
|
|
0].text
|
||
|
|
Ratal_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(72) td:nth-child(4) span')[
|
||
|
|
0].text
|
||
|
|
break
|
||
|
|
else:
|
||
|
|
# 资产总额
|
||
|
|
TotalAssets_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(69) td:nth-child(2) span')[
|
||
|
|
0].text
|
||
|
|
TotalAssets_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(69) td:nth-child(3) span')[
|
||
|
|
0].text
|
||
|
|
TotalAssets_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(69) td:nth-child(4) span')[
|
||
|
|
0].text
|
||
|
|
# 销售收入
|
||
|
|
SalesProceeds_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(2) span')[
|
||
|
|
0].text
|
||
|
|
SalesProceeds_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(3) span')[
|
||
|
|
0].text
|
||
|
|
SalesProceeds_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(4) span')[
|
||
|
|
0].text
|
||
|
|
# 利润总额
|
||
|
|
LRZE_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(48) td:nth-child(2) span')[
|
||
|
|
0].text
|
||
|
|
LRZE_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(48) td:nth-child(3) span')[
|
||
|
|
0].text
|
||
|
|
LRZE_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(48) td:nth-child(4) span')[
|
||
|
|
0].text
|
||
|
|
# 净利润
|
||
|
|
JLR_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(52) td:nth-child(2) span')[
|
||
|
|
0].text
|
||
|
|
JLR_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(52) td:nth-child(3) span')[
|
||
|
|
0].text
|
||
|
|
JLR_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(52) td:nth-child(4) span')[
|
||
|
|
0].text
|
||
|
|
# 负债总计
|
||
|
|
FZZJ_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(126) td:nth-child(2) span')[
|
||
|
|
0].text
|
||
|
|
FZZJ_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(126) td:nth-child(3) span')[
|
||
|
|
0].text
|
||
|
|
FZZJ_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(126) td:nth-child(4) span')[
|
||
|
|
0].text
|
||
|
|
# 纳税额
|
||
|
|
Ratal_2022 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(88) td:nth-child(2) span')[
|
||
|
|
0].text
|
||
|
|
Ratal_2021 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(88) td:nth-child(3) span')[
|
||
|
|
0].text
|
||
|
|
Ratal_2020 = \
|
||
|
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(88) td:nth-child(4) span')[
|
||
|
|
0].text
|
||
|
|
break
|
||
|
|
else:
|
||
|
|
print("数据获取失败")
|
||
|
|
return
|
||
|
|
except:
|
||
|
|
if retry_time > 25:
|
||
|
|
with open('company_error.csv', 'a', newline='') as newfile:
|
||
|
|
writer = csv.writer(newfile)
|
||
|
|
company_a = [code,name,company_name]
|
||
|
|
writer.writerow(company_a)
|
||
|
|
newfile.close()
|
||
|
|
flag = 1
|
||
|
|
break
|
||
|
|
retry_time += 1
|
||
|
|
time.sleep(0.2)
|
||
|
|
|
||
|
|
if flag != 1:
|
||
|
|
company_finance_details = {"股票代码":code,"股票名称":name,"公司名称":company_name,
|
||
|
|
"资产总额2022":TotalAssets_2022,"资产总额2021":TotalAssets_2021,"资产总额2020":TotalAssets_2020,
|
||
|
|
"销售收入2022":SalesProceeds_2022,"销售收入2021":SalesProceeds_2021,"销售收入2020":SalesProceeds_2020,
|
||
|
|
"利润总额2022":LRZE_2022,"利润总额2021":LRZE_2021,"利润总额2020":LRZE_2020,
|
||
|
|
"净利润2022":JLR_2022,"净利润2021":JLR_2021,"净利润2020":JLR_2020,
|
||
|
|
"负债总计2022":FZZJ_2022,"负债总计2021":FZZJ_2021,"负债总计2020":FZZJ_2020,
|
||
|
|
"纳税额2022":Ratal_2022,"纳税额2021":Ratal_2021,"纳税额2020":Ratal_2020
|
||
|
|
}
|
||
|
|
print(company_finance_details)
|
||
|
|
company_finance_details_csv = [code,name,company_name,
|
||
|
|
TotalAssets_2022,TotalAssets_2021,TotalAssets_2020,
|
||
|
|
SalesProceeds_2022,SalesProceeds_2021,SalesProceeds_2020,
|
||
|
|
LRZE_2022,LRZE_2021,LRZE_2020,
|
||
|
|
JLR_2022,JLR_2021,JLR_2020,
|
||
|
|
FZZJ_2022,FZZJ_2021,FZZJ_2020,
|
||
|
|
Ratal_2022,Ratal_2021,Ratal_2020]
|
||
|
|
with open('company_finance_details.csv', 'a', newline='') as newfile:
|
||
|
|
writer = csv.writer(newfile)
|
||
|
|
writer.writerow(company_finance_details_csv)
|
||
|
|
newfile.close()
|
||
|
|
browser.close()
|
||
|
|
browser.switch_to.window(browser.window_handles[0])
|
||
|
|
else:
|
||
|
|
browser.close()
|
||
|
|
browser.switch_to.window(browser.window_handles[0])
|
||
|
|
|
||
|
|
browser.close()
|
||
|
|
|
||
|
|
file = 'company.csv'
|
||
|
|
with open(file,'r') as file:
|
||
|
|
code_list = csv.reader(file,delimiter=',')
|
||
|
|
stock_code_list = []
|
||
|
|
for code in code_list:
|
||
|
|
stock_code_list.append(code)
|
||
|
|
del stock_code_list[0]
|
||
|
|
get_company_finance(stock_code_list)
|