@ -2,6 +2,7 @@
import csv
import csv
import time
import time
from selenium import webdriver
from selenium import webdriver
from scrapy . selector import Selector
MAX_RETRY_TIME = 150
MAX_RETRY_TIME = 150
URL = " https://emweb.securities.eastmoney.com/pc_hsf10/pages/index.html?type=web&code= %s &color=b#/cwfx "
URL = " https://emweb.securities.eastmoney.com/pc_hsf10/pages/index.html?type=web&code= %s &color=b#/cwfx "
@ -27,12 +28,26 @@ def get_company_finance(stock_code_list):
try :
try :
if retry_time < MAX_RETRY_TIME :
if retry_time < MAX_RETRY_TIME :
browser . switch_to . window ( browser . window_handles [ 1 ] )
browser . switch_to . window ( browser . window_handles [ 1 ] )
zcfzb_button = browser . find_elements_by_css_selector ( ' div.section.zcfzb ul:last-child li:nth-child(2) ' ) [ 0 ]
# 财务报表-资产负债表按钮css
lrb_button = browser . find_elements_by_css_selector ( ' div.section.lrb ul:last-child li:nth-child(2) ' ) [ 0 ]
zcfzb_button = browser . find_elements_by_css_selector ( ' div.cwbbTab ul.commonTab li:nth-child(1) ' ) [ 0 ]
# 财务报表-资产负债表的年报按钮css
zcfzb_nb_button = browser . find_elements_by_css_selector ( ' div.cwbbTab+div>div.tab ul:last-child li:nth-child(2) ' ) [ 0 ]
# 财务报表-利润表按钮css
lrb_button = browser . find_elements_by_css_selector ( ' div.cwbbTab ul.commonTab li:nth-child(2) ' ) [ 0 ]
# 财务报表-利润表的年报按钮css
lrb_nb_button = browser . find_elements_by_css_selector ( ' div.cwbbTab ~ div ~ div > div.tab ul:last-child li:nth-child(2) ' ) [ 0 ]
# 点击并保存源码
browser . execute_script ( " arguments[0].click(); " , zcfzb_button )
browser . execute_script ( " arguments[0].click(); " , zcfzb_button )
time . sleep ( 0.2 )
time . sleep ( 0.2 )
browser . execute_script ( " arguments[0].click(); " , zcfzb_nb_button )
time . sleep ( 2 )
page_source_zcfz = browser . page_source
# 点击并保存源码
browser . execute_script ( " arguments[0].click(); " , lrb_button )
browser . execute_script ( " arguments[0].click(); " , lrb_button )
time . sleep ( 0.2 )
time . sleep ( 0.2 )
browser . execute_script ( " arguments[0].click(); " , lrb_nb_button )
time . sleep ( 2 )
page_source_lrb = browser . page_source
break
break
else :
else :
print ( " 年报按钮获取失败 " )
print ( " 年报按钮获取失败 " )
@ -47,225 +62,166 @@ def get_company_finance(stock_code_list):
try :
try :
if retry_time < MAX_RETRY_TIME :
if retry_time < MAX_RETRY_TIME :
if " 银行 " in company_name or " 信托 " in company_name :
if " 银行 " in company_name or " 信托 " in company_name :
# 资产总额
# 资产总额(资产总计)
TotalAssets_2022 = \
TotalAssets_2024 = Selector ( text = page_source_zcfz ) . css ( " div.zcfzb_table tr:nth-child(43) td:nth-child(2) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(43) td:nth-child(2) span ' ) [
TotalAssets_2023 = Selector ( text = page_source_zcfz ) . css ( " div.zcfzb_table tr:nth-child(43) td:nth-child(3) span::text " ) . extract_first ( )
0 ] . text
TotalAssets_2022 = Selector ( text = page_source_zcfz ) . css ( " div.zcfzb_table tr:nth-child(43) td:nth-child(4) span::text " ) . extract_first ( )
TotalAssets_2021 = \
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(43) td:nth-child(3) span ' ) [
# 销售收入(营业收入)
0 ] . text
SalesProceeds_2024 = Selector ( text = page_source_lrb ) . css ( " div.lrb_table tr:nth-child(2) td:nth-child(2) span::text " ) . extract_first ( )
TotalAssets_2020 = \
SalesProceeds_2023 = Selector ( text = page_source_lrb ) . css ( " div.lrb_table tr:nth-child(2) td:nth-child(3) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(43) td:nth-child(4) span ' ) [
SalesProceeds_2022 = Selector ( text = page_source_lrb ) . css ( " div.lrb_table tr:nth-child(2) td:nth-child(4) span::text " ) . extract_first ( )
0 ] . text
# 销售收入
SalesProceeds_2022 = \
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(2) td:nth-child(2) span ' ) [ 0 ] . text
SalesProceeds_2021 = \
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(2) td:nth-child(3) span ' ) [ 0 ] . text
SalesProceeds_2020 = \
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(2) td:nth-child(4) span ' ) [ 0 ] . text
# 利润总额
# 利润总额
LRZE_2022 = \
LRZE_2024 = Selector ( text = page_source_lrb ) . css ( " div.lrb_table tr:nth-child(35) td:nth-child(2) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(35) td:nth-child(2) span ' ) [ 0 ] . text
LRZE_2023 = Selector ( text = page_source_lrb ) . css ( " div.lrb_table tr:nth-child(35) td:nth-child(3) span::text " ) . extract_first ( )
LRZE_2021 = \
LRZE_2022 = Selector ( text = page_source_lrb ) . css ( " div.lrb_table tr:nth-child(35) td:nth-child(4) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(35) td:nth-child(3) span ' ) [ 0 ] . text
LRZE_2020 = \
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(35) td:nth-child(4) span ' ) [ 0 ] . text
# 净利润
# 净利润
JLR_2022 = \
JLR_2024 = Selector ( text = page_source_lrb ) . css ( " div.lrb_table tr:nth-child(39) td:nth-child(2) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(39) td:nth-child(2) span ' ) [ 0 ] . text
JLR_2023 = Selector ( text = page_source_lrb ) . css ( " div.lrb_table tr:nth-child(39) td:nth-child(3) span::text " ) . extract_first ( )
JLR_2021 = \
JLR_2022 = Selector ( text = page_source_lrb ) . css ( " div.lrb_table tr:nth-child(39) td:nth-child(4) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(39) td:nth-child(3) span ' ) [ 0 ] . text
# 负债总计(负债合计)
JLR_2020 = \
FZZJ_2024 = Selector ( text = page_source_zcfz ) . css ( " div.zcfzb_table tr:nth-child(74) td:nth-child(2) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(39) td:nth-child(4) span ' ) [ 0 ] . text
FZZJ_2023 = Selector ( text = page_source_zcfz ) . css ( " div.zcfzb_table tr:nth-child(74) td:nth-child(3) span::text " ) . extract_first ( )
# 负债总计
FZZJ_2022 = Selector ( text = page_source_zcfz ) . css ( " div.zcfzb_table tr:nth-child(74) td:nth-child(4) span::text " ) . extract_first ( )
FZZJ_2022 = \
# 纳税额(应交税费)
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(74) td:nth-child(2) span ' ) [
Ratal_2024 = Selector ( text = page_source_zcfz ) . css ( " div.zcfzb_table tr:nth-child(59) td:nth-child(2) span::text " ) . extract_first ( )
0 ] . text
Ratal_2023 = Selector ( text = page_source_zcfz ) . css ( " div.zcfzb_table tr:nth-child(59) td:nth-child(3) span::text " ) . extract_first ( )
FZZJ_2021 = \
Ratal_2022 = Selector ( text = page_source_zcfz ) . css ( " div.zcfzb_table tr:nth-child(59) td:nth-child(4) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(74) td:nth-child(3) span ' ) [
0 ] . text
FZZJ_2020 = \
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(74) td:nth-child(4) span ' ) [
0 ] . text
# 纳税额
Ratal_2022 = \
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(59) td:nth-child(2) span ' ) [
0 ] . text
Ratal_2021 = \
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(59) td:nth-child(3) span ' ) [
0 ] . text
Ratal_2020 = \
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(59) td:nth-child(4) span ' ) [
0 ] . text
break
break
elif " 金融 " in company_name or " 证券 " in company_name or " 期货 " in company_name :
elif " 证券 " in company_name or " 期货 " in company_name :
# 资产总额
# 资产总额(资产总计)
TotalAssets_2022 = \
TotalAssets_2024 = Selector ( text = page_source_zcfz ) . css (
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(44) td:nth-child(2) span ' ) [
" div.zcfzb_table tr:nth-child(44) td:nth-child(2) span::text " ) . extract_first ( )
0 ] . text
TotalAssets_2023 = Selector ( text = page_source_zcfz ) . css (
TotalAssets_2021 = \
" div.zcfzb_table tr:nth-child(44) td:nth-child(3) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(44) td:nth-child(3) span ' ) [
TotalAssets_2022 = Selector ( text = page_source_zcfz ) . css (
0 ] . text
" div.zcfzb_table tr:nth-child(44) td:nth-child(4) span::text " ) . extract_first ( )
TotalAssets_2020 = \
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(44) td:nth-child(4) span ' ) [
# 销售收入(营业收入)
0 ] . text
SalesProceeds_2024 = Selector ( text = page_source_lrb ) . css (
# 销售收入
" div.lrb_table tr:nth-child(2) td:nth-child(2) span::text " ) . extract_first ( )
SalesProceeds_2022 = \
SalesProceeds_2023 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(2) td:nth-child(2) span ' ) [ 0 ] . text
" div.lrb_table tr:nth-child(2) td:nth-child(3) span::text " ) . extract_first ( )
SalesProceeds_2021 = \
SalesProceeds_2022 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(2) td:nth-child(3) span ' ) [ 0 ] . text
" div.lrb_table tr:nth-child(2) td:nth-child(4) span::text " ) . extract_first ( )
SalesProceeds_2020 = \
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(2) td:nth-child(4) span ' ) [ 0 ] . text
# 利润总额
# 利润总额
LRZE_2022 = \
LRZE_2024 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(40) td:nth-child(2) span ' ) [ 0 ] . text
" div.lrb_table tr:nth-child(40) td:nth-child(2) span::text " ) . extract_first ( )
LRZE_2021 = \
LRZE_2023 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(40) td:nth-child(3) span ' ) [ 0 ] . text
" div.lrb_table tr:nth-child(40) td:nth-child(3) span::text " ) . extract_first ( )
LRZE_2020 = \
LRZE_2022 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(40) td:nth-child(4) span ' ) [ 0 ] . text
" div.lrb_table tr:nth-child(40) td:nth-child(4) span::text " ) . extract_first ( )
# 净利润
# 净利润
JLR_2022 = \
JLR_2024 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(42) td:nth-child(2) span ' ) [ 0 ] . text
" div.lrb_table tr:nth-child(42) td:nth-child(2) span::text " ) . extract_first ( )
JLR_2021 = \
JLR_2023 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(42) td:nth-child(3) span ' ) [ 0 ] . text
" div.lrb_table tr:nth-child(42) td:nth-child(3) span::text " ) . extract_first ( )
JLR_2020 = \
JLR_2022 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(42) td:nth-child(4) span ' ) [ 0 ] . text
" div.lrb_table tr:nth-child(42) td:nth-child(4) span::text " ) . extract_first ( )
# 负债总计
# 负债总计(负债合计)
FZZJ_2022 = \
FZZJ_2024 = Selector ( text = page_source_zcfz ) . css (
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(76) td:nth-child(2) span ' ) [
" div.zcfzb_table tr:nth-child(76) td:nth-child(2) span::text " ) . extract_first ( )
0 ] . text
FZZJ_2023 = Selector ( text = page_source_zcfz ) . css (
FZZJ_2021 = \
" div.zcfzb_table tr:nth-child(76) td:nth-child(3) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(76) td:nth-child(3) span ' ) [
FZZJ_2022 = Selector ( text = page_source_zcfz ) . css (
0 ] . text
" div.zcfzb_table tr:nth-child(76) td:nth-child(4) span::text " ) . extract_first ( )
FZZJ_2020 = \
# 纳税额(应交税费)
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(76) td:nth-child(4) span ' ) [
Ratal_2024 = Selector ( text = page_source_zcfz ) . css (
0 ] . text
" div.zcfzb_table tr:nth-child(61) td:nth-child(2) span::text " ) . extract_first ( )
# 纳税额
Ratal_2023 = Selector ( text = page_source_zcfz ) . css (
Ratal_2022 = \
" div.zcfzb_table tr:nth-child(61) td:nth-child(3) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(61) td:nth-child(2) span ' ) [
Ratal_2022 = Selector ( text = page_source_zcfz ) . css (
0 ] . text
" div.zcfzb_table tr:nth-child(61) td:nth-child(4) span::text " ) . extract_first ( )
Ratal_2021 = \
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(61) td:nth-child(3) span ' ) [
0 ] . text
Ratal_2020 = \
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(61) td:nth-child(4) span ' ) [
0 ] . text
break
break
elif " 保险 " in company_name :
elif " 保险 " in company_name :
# 资产总额
# 资产总额(资产总计)
TotalAssets_2022 = \
TotalAssets_2024 = Selector ( text = page_source_zcfz ) . css (
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(51) td:nth-child(2) span ' ) [
" div.zcfzb_table tr:nth-child(51) td:nth-child(2) span::text " ) . extract_first ( )
0 ] . text
TotalAssets_2023 = Selector ( text = page_source_zcfz ) . css (
TotalAssets_2021 = \
" div.zcfzb_table tr:nth-child(51) td:nth-child(3) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(51) td:nth-child(3) span ' ) [
TotalAssets_2022 = Selector ( text = page_source_zcfz ) . css (
0 ] . text
" div.zcfzb_table tr:nth-child(51) td:nth-child(4) span::text " ) . extract_first ( )
TotalAssets_2020 = \
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(51) td:nth-child(4) span ' ) [
# 销售收入(营业收入)
0 ] . text
SalesProceeds_2024 = Selector ( text = page_source_lrb ) . css (
# 销售收入
" div.lrb_table tr:nth-child(2) td:nth-child(2) span::text " ) . extract_first ( )
SalesProceeds_2022 = \
SalesProceeds_2023 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(2) td:nth-child(2) span ' ) [ 0 ] . text
" div.lrb_table tr:nth-child(2) td:nth-child(3) span::text " ) . extract_first ( )
SalesProceeds_2021 = \
SalesProceeds_2022 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(2) td:nth-child(3) span ' ) [ 0 ] . text
" div.lrb_table tr:nth-child(2) td:nth-child(4) span::text " ) . extract_first ( )
SalesProceeds_2020 = \
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(2) td:nth-child(4) span ' ) [ 0 ] . text
# 利润总额
# 利润总额
LRZE_2022 = \
LRZE_2024 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(53) td:nth-child(2) span ' ) [ 0 ] . text
" div.lrb_table tr:nth-child(53) td:nth-child(2) span::text " ) . extract_first ( )
LRZE_2021 = \
LRZE_2023 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(53) td:nth-child(3) span ' ) [ 0 ] . text
" div.lrb_table tr:nth-child(53) td:nth-child(3) span::text " ) . extract_first ( )
LRZE_2020 = \
LRZE_2022 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(53) td:nth-child(4) span ' ) [ 0 ] . text
" div.lrb_table tr:nth-child(53) td:nth-child(4) span::text " ) . extract_first ( )
# 净利润
# 净利润
JLR_2022 = \
JLR_2024 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(57) td:nth-child(2) span ' ) [ 0 ] . text
" div.lrb_table tr:nth-child(57) td:nth-child(2) span::text " ) . extract_first ( )
JLR_2021 = \
JLR_2023 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(57) td:nth-child(3) span ' ) [ 0 ] . text
" div.lrb_table tr:nth-child(57) td:nth-child(3) span::text " ) . extract_first ( )
JLR_2020 = \
JLR_2022 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(57) td:nth-child(4) span ' ) [ 0 ] . text
" div.lrb_table tr:nth-child(57) td:nth-child(4) span::text " ) . extract_first ( )
# 负债总计
# 负债总计(负债合计)
FZZJ_2022 = \
FZZJ_2024 = Selector ( text = page_source_zcfz ) . css (
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(98) td:nth-child(2) span ' ) [
" div.zcfzb_table tr:nth-child(98) td:nth-child(2) span::text " ) . extract_first ( )
0 ] . text
FZZJ_2023 = Selector ( text = page_source_zcfz ) . css (
FZZJ_2021 = \
" div.zcfzb_table tr:nth-child(98) td:nth-child(3) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(98) td:nth-child(3) span ' ) [
FZZJ_2022 = Selector ( text = page_source_zcfz ) . css (
0 ] . text
" div.zcfzb_table tr:nth-child(98) td:nth-child(4) span::text " ) . extract_first ( )
FZZJ_2020 = \
# 纳税额(应交税费)
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(98) td:nth-child(4) span ' ) [
Ratal_2024 = Selector ( text = page_source_zcfz ) . css (
0 ] . text
" div.zcfzb_table tr:nth-child(72) td:nth-child(2) span::text " ) . extract_first ( )
# 纳税额
Ratal_2023 = Selector ( text = page_source_zcfz ) . css (
Ratal_2022 = \
" div.zcfzb_table tr:nth-child(72) td:nth-child(3) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(72) td:nth-child(2) span ' ) [
Ratal_2022 = Selector ( text = page_source_zcfz ) . css (
0 ] . text
" div.zcfzb_table tr:nth-child(72) td:nth-child(4) span::text " ) . extract_first ( )
Ratal_2021 = \
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(72) td:nth-child(3) span ' ) [
0 ] . text
Ratal_2020 = \
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(72) td:nth-child(4) span ' ) [
0 ] . text
break
break
else :
else :
# 资产总额
# 资产总额(资产总计)
TotalAssets_2022 = \
TotalAssets_2024 = Selector ( text = page_source_zcfz ) . css (
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(69) td:nth-child(2) span ' ) [
" div.zcfzb_table tr:nth-child(69) td:nth-child(2) span::text " ) . extract_first ( )
0 ] . text
TotalAssets_2023 = Selector ( text = page_source_zcfz ) . css (
TotalAssets_2021 = \
" div.zcfzb_table tr:nth-child(69) td:nth-child(3) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(69) td:nth-child(3) span ' ) [
TotalAssets_2022 = Selector ( text = page_source_zcfz ) . css (
0 ] . text
" div.zcfzb_table tr:nth-child(69) td:nth-child(4) span::text " ) . extract_first ( )
TotalAssets_2020 = \
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(69) td:nth-child(4) span ' ) [
# 销售收入(营业收入)
0 ] . text
SalesProceeds_2024 = Selector ( text = page_source_lrb ) . css (
# 销售收入
" div.lrb_table tr:nth-child(2) td:nth-child(2) span::text " ) . extract_first ( )
SalesProceeds_2022 = \
SalesProceeds_2023 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(2) td:nth-child(2) span ' ) [
" div.lrb_table tr:nth-child(2) td:nth-child(3) span::text " ) . extract_first ( )
0 ] . text
SalesProceeds_2022 = Selector ( text = page_source_lrb ) . css (
SalesProceeds_2021 = \
" div.lrb_table tr:nth-child(2) td:nth-child(4) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(2) td:nth-child(3) span ' ) [
0 ] . text
SalesProceeds_2020 = \
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(2) td:nth-child(4) span ' ) [
0 ] . text
# 利润总额
# 利润总额
LRZE_2022 = \
LRZE_2024 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(48) td:nth-child(2) span ' ) [
" div.lrb_table tr:nth-child(48) td:nth-child(2) span::text " ) . extract_first ( )
0 ] . text
LRZE_2023 = Selector ( text = page_source_lrb ) . css (
LRZE_2021 = \
" div.lrb_table tr:nth-child(48) td:nth-child(3) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(48) td:nth-child(3) span ' ) [
LRZE_2022 = Selector ( text = page_source_lrb ) . css (
0 ] . text
" div.lrb_table tr:nth-child(48) td:nth-child(4) span::text " ) . extract_first ( )
LRZE_2020 = \
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(48) td:nth-child(4) span ' ) [
0 ] . text
# 净利润
# 净利润
JLR_2022 = \
JLR_2024 = Selector ( text = page_source_lrb ) . css (
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(52) td:nth-child(2) span ' ) [
" div.lrb_table tr:nth-child(52) td:nth-child(2) span::text " ) . extract_first ( )
0 ] . text
JLR_2023 = Selector ( text = page_source_lrb ) . css (
JLR_2021 = \
" div.lrb_table tr:nth-child(52) td:nth-child(3) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(52) td:nth-child(3) span ' ) [
JLR_2022 = Selector ( text = page_source_lrb ) . css (
0 ] . text
" div.lrb_table tr:nth-child(52) td:nth-child(4) span::text " ) . extract_first ( )
JLR_2020 = \
# 负债总计(负债合计)
browser . find_elements_by_css_selector ( ' div.lrb_table tr:nth-child(52) td:nth-child(4) span ' ) [
FZZJ_2024 = Selector ( text = page_source_zcfz ) . css (
0 ] . text
" div.zcfzb_table tr:nth-child(126) td:nth-child(2) span::text " ) . extract_first ( )
# 负债总计
FZZJ_2023 = Selector ( text = page_source_zcfz ) . css (
FZZJ_2022 = \
" div.zcfzb_table tr:nth-child(126) td:nth-child(3) span::text " ) . extract_first ( )
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(126) td:nth-child(2) span ' ) [
FZZJ_2022 = Selector ( text = page_source_zcfz ) . css (
0 ] . text
" div.zcfzb_table tr:nth-child(126) td:nth-child(4) span::text " ) . extract_first ( )
FZZJ_2021 = \
# 纳税额(应交税费)
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(126) td:nth-child(3) span ' ) [
Ratal_2024 = Selector ( text = page_source_zcfz ) . css (
0 ] . text
" div.zcfzb_table tr:nth-child(88) td:nth-child(2) span::text " ) . extract_first ( )
FZZJ_2020 = \
Ratal_2023 = Selector ( text = page_source_zcfz ) . css (
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(126) td:nth-child(4) span ' ) [
" div.zcfzb_table tr:nth-child(88) td:nth-child(3) span::text " ) . extract_first ( )
0 ] . text
Ratal_2022 = Selector ( text = page_source_zcfz ) . css (
# 纳税额
" div.zcfzb_table tr:nth-child(88) td:nth-child(4) span::text " ) . extract_first ( )
Ratal_2022 = \
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(88) td:nth-child(2) span ' ) [
0 ] . text
Ratal_2021 = \
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(88) td:nth-child(3) span ' ) [
0 ] . text
Ratal_2020 = \
browser . find_elements_by_css_selector ( ' div.zcfzb_table tr:nth-child(88) td:nth-child(4) span ' ) [
0 ] . text
break
break
else :
else :
print ( " 数据获取失败 " )
print ( " 数据获取失败 " )
@ -284,21 +240,21 @@ def get_company_finance(stock_code_list):
if flag != 1 :
if flag != 1 :
company_finance_details = { " 股票代码 " : code , " 股票名称 " : name , " 公司名称 " : company_name ,
company_finance_details = { " 股票代码 " : code , " 股票名称 " : name , " 公司名称 " : company_name ,
" 资产总额2022 " : TotalAssets_2022 , " 资产总额2021 " : TotalAssets_2021 , " 资产总额2020 " : TotalAssets_2020 ,
" 资产总额2024 " : TotalAssets_2024 , " 资产总额2023 " : TotalAssets_2023 , " 资产总额2022 " : TotalAssets_2022 ,
" 销售收入2022 " : SalesProceeds_2022 , " 销售收入2021 " : SalesProceeds_2021 , " 销售收入2020 " : SalesProceeds_2020 ,
" 销售收入2024 " : SalesProceeds_2024 , " 销售收入2023 " : SalesProceeds_2023 , " 销售收入2022 " : SalesProceeds_2022 ,
" 利润总额2022 " : LRZE_2022 , " 利润总额2021 " : LRZE_2021 , " 利润总额2020 " : LRZE_2020 ,
" 利润总额2024 " : LRZE_2024 , " 利润总额2023 " : LRZE_2023 , " 利润总额2022 " : LRZE_2022 ,
" 净利润2022 " : JLR_2022 , " 净利润2021 " : JLR_2021 , " 净利润2020 " : JLR_2020 ,
" 净利润2024 " : JLR_2024 , " 净利润2023 " : JLR_2023 , " 净利润2022 " : JLR_2022 ,
" 负债总计2022 " : FZZJ_2022 , " 负债总计2021 " : FZZJ_2021 , " 负债总计2020 " : FZZJ_2020 ,
" 负债总计2024 " : FZZJ_2024 , " 负债总计2023 " : FZZJ_2023 , " 负债总计2022 " : FZZJ_2022 ,
" 纳税额2022 " : Ratal_2022 , " 纳税额2021 " : Ratal_2021 , " 纳税额2020 " : Ratal_2020
" 纳税额2024 " : Ratal_2024 , " 纳税额2023 " : Ratal_2023 , " 纳税额2022 " : Ratal_2022
}
}
print ( company_finance_details )
print ( company_finance_details )
company_finance_details_csv = [ code , name , company_name ,
company_finance_details_csv = [ code , name , company_name ,
TotalAssets_2022 , TotalAssets_2021 , TotalAssets_2020 ,
TotalAssets_2024 , TotalAssets_2023 , TotalAssets_2022 ,
SalesProceeds_2022 , SalesProceeds_2021 , SalesProceeds_2020 ,
SalesProceeds_2024 , SalesProceeds_2023 , SalesProceeds_2022 ,
LRZE_2022 , LRZE_2021 , LRZE_2020 ,
LRZE_2024 , LRZE_2023 , LRZE_2022 ,
JLR_2022 , JLR_2021 , JLR_2020 ,
JLR_2024 , JLR_2023 , JLR_2022 ,
FZZJ_2022 , FZZJ_2021 , FZZJ_2020 ,
FZZJ_2024 , FZZJ_2023 , FZZJ_2022 ,
Ratal_2022 , Ratal_2021 , Ratal_2020 ]
Ratal_2024 , Ratal_2023 , Ratal_2022 ]
with open ( ' company_finance_details.csv ' , ' a ' , newline = ' ' ) as newfile :
with open ( ' company_finance_details.csv ' , ' a ' , newline = ' ' ) as newfile :
writer = csv . writer ( newfile )
writer = csv . writer ( newfile )
writer . writerow ( company_finance_details_csv )
writer . writerow ( company_finance_details_csv )
@ -317,5 +273,4 @@ with open(file,'r') as file:
stock_code_list = [ ]
stock_code_list = [ ]
for code in code_list :
for code in code_list :
stock_code_list . append ( code )
stock_code_list . append ( code )
del stock_code_list [ 0 ]
get_company_finance ( stock_code_list )
get_company_finance ( stock_code_list )