Browse Source

1.完善帮助文档

2.修复一些bug
3.生成文件模板
4.ccw_company_finance_details新增字段Year.
master
lijunhui 7 months ago
parent
commit
518ffa3b5c
  1. 2
      README.txt
  2. 5410
      company.csv
  3. 5410
      company_finance_details.csv
  4. 397
      eastmoney.py
  5. 2
      eastmoney_getcompany.py
  6. 77
      import_finance.py
  7. 343
      stock_code.csv

2
README.txt

@ -1,4 +1,4 @@
1.先到东方财富终端导出股票代码以及股票名称,保存到stock_code中; 1.先到东方财富下载客户端,点击沪深京,右键导出全部股票代码以及股票名称,保存到stock_code中;
2.执行eastmoney_getcompany.py,获得公司名称company.csv; 2.执行eastmoney_getcompany.py,获得公司名称company.csv;
3.执行eastmoney.py,获得公司详细财务信息company_finance_details.csv,其中由于某些公司css布局会有特殊情况采集不到,会收集到company_error.csv中,需要修改css代码重新爬取或者手动采集; 3.执行eastmoney.py,获得公司详细财务信息company_finance_details.csv,其中由于某些公司css布局会有特殊情况采集不到,会收集到company_error.csv中,需要修改css代码重新爬取或者手动采集;
4.整理好数据后,执行import_finance.py将数据导入数据库中。 4.整理好数据后,执行import_finance.py将数据导入数据库中。

5410
company.csv

File diff suppressed because it is too large

5410
company_finance_details.csv

File diff suppressed because it is too large

397
eastmoney.py

@ -2,6 +2,7 @@
import csv import csv
import time import time
from selenium import webdriver from selenium import webdriver
from scrapy.selector import Selector
MAX_RETRY_TIME = 150 MAX_RETRY_TIME = 150
URL = "https://emweb.securities.eastmoney.com/pc_hsf10/pages/index.html?type=web&code=%s&color=b#/cwfx" URL = "https://emweb.securities.eastmoney.com/pc_hsf10/pages/index.html?type=web&code=%s&color=b#/cwfx"
@ -27,12 +28,26 @@ def get_company_finance(stock_code_list):
try: try:
if retry_time < MAX_RETRY_TIME: if retry_time < MAX_RETRY_TIME:
browser.switch_to.window(browser.window_handles[1]) browser.switch_to.window(browser.window_handles[1])
zcfzb_button = browser.find_elements_by_css_selector('div.section.zcfzb ul:last-child li:nth-child(2)')[0] # 财务报表-资产负债表按钮css
lrb_button = browser.find_elements_by_css_selector('div.section.lrb ul:last-child li:nth-child(2)')[0] zcfzb_button = browser.find_elements_by_css_selector('div.cwbbTab ul.commonTab li:nth-child(1)')[0]
# 财务报表-资产负债表的年报按钮css
zcfzb_nb_button = browser.find_elements_by_css_selector('div.cwbbTab+div>div.tab ul:last-child li:nth-child(2)')[0]
# 财务报表-利润表按钮css
lrb_button = browser.find_elements_by_css_selector('div.cwbbTab ul.commonTab li:nth-child(2)')[0]
# 财务报表-利润表的年报按钮css
lrb_nb_button = browser.find_elements_by_css_selector('div.cwbbTab ~ div ~ div > div.tab ul:last-child li:nth-child(2)')[0]
# 点击并保存源码
browser.execute_script("arguments[0].click();", zcfzb_button) browser.execute_script("arguments[0].click();", zcfzb_button)
time.sleep(0.2) time.sleep(0.2)
browser.execute_script("arguments[0].click();", zcfzb_nb_button)
time.sleep(2)
page_source_zcfz = browser.page_source
# 点击并保存源码
browser.execute_script("arguments[0].click();", lrb_button) browser.execute_script("arguments[0].click();", lrb_button)
time.sleep(0.2) time.sleep(0.2)
browser.execute_script("arguments[0].click();", lrb_nb_button)
time.sleep(2)
page_source_lrb = browser.page_source
break break
else: else:
print("年报按钮获取失败") print("年报按钮获取失败")
@ -47,225 +62,166 @@ def get_company_finance(stock_code_list):
try: try:
if retry_time < MAX_RETRY_TIME: if retry_time < MAX_RETRY_TIME:
if "银行" in company_name or "信托" in company_name: if "银行" in company_name or "信托" in company_name:
# 资产总额 # 资产总额(资产总计)
TotalAssets_2022 = \ TotalAssets_2024 = Selector(text=page_source_zcfz).css("div.zcfzb_table tr:nth-child(43) td:nth-child(2) span::text").extract_first()
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(43) td:nth-child(2) span')[ TotalAssets_2023 = Selector(text=page_source_zcfz).css("div.zcfzb_table tr:nth-child(43) td:nth-child(3) span::text").extract_first()
0].text TotalAssets_2022 = Selector(text=page_source_zcfz).css("div.zcfzb_table tr:nth-child(43) td:nth-child(4) span::text").extract_first()
TotalAssets_2021 = \
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(43) td:nth-child(3) span')[ # 销售收入(营业收入)
0].text SalesProceeds_2024 = Selector(text=page_source_lrb).css("div.lrb_table tr:nth-child(2) td:nth-child(2) span::text").extract_first()
TotalAssets_2020 = \ SalesProceeds_2023 = Selector(text=page_source_lrb).css("div.lrb_table tr:nth-child(2) td:nth-child(3) span::text").extract_first()
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(43) td:nth-child(4) span')[ SalesProceeds_2022 = Selector(text=page_source_lrb).css("div.lrb_table tr:nth-child(2) td:nth-child(4) span::text").extract_first()
0].text
# 销售收入
SalesProceeds_2022 = \
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(2) span')[0].text
SalesProceeds_2021 = \
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(3) span')[0].text
SalesProceeds_2020 = \
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(4) span')[0].text
# 利润总额 # 利润总额
LRZE_2022 = \ LRZE_2024 = Selector(text=page_source_lrb).css("div.lrb_table tr:nth-child(35) td:nth-child(2) span::text").extract_first()
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(35) td:nth-child(2) span')[0].text LRZE_2023 = Selector(text=page_source_lrb).css("div.lrb_table tr:nth-child(35) td:nth-child(3) span::text").extract_first()
LRZE_2021 = \ LRZE_2022 = Selector(text=page_source_lrb).css("div.lrb_table tr:nth-child(35) td:nth-child(4) span::text").extract_first()
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(35) td:nth-child(3) span')[0].text
LRZE_2020 = \
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(35) td:nth-child(4) span')[0].text
# 净利润 # 净利润
JLR_2022 = \ JLR_2024 = Selector(text=page_source_lrb).css("div.lrb_table tr:nth-child(39) td:nth-child(2) span::text").extract_first()
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(39) td:nth-child(2) span')[0].text JLR_2023 = Selector(text=page_source_lrb).css("div.lrb_table tr:nth-child(39) td:nth-child(3) span::text").extract_first()
JLR_2021 = \ JLR_2022 = Selector(text=page_source_lrb).css("div.lrb_table tr:nth-child(39) td:nth-child(4) span::text").extract_first()
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(39) td:nth-child(3) span')[0].text # 负债总计(负债合计)
JLR_2020 = \ FZZJ_2024 = Selector(text=page_source_zcfz).css("div.zcfzb_table tr:nth-child(74) td:nth-child(2) span::text").extract_first()
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(39) td:nth-child(4) span')[0].text FZZJ_2023 = Selector(text=page_source_zcfz).css("div.zcfzb_table tr:nth-child(74) td:nth-child(3) span::text").extract_first()
# 负债总计 FZZJ_2022 = Selector(text=page_source_zcfz).css("div.zcfzb_table tr:nth-child(74) td:nth-child(4) span::text").extract_first()
FZZJ_2022 = \ # 纳税额(应交税费)
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(74) td:nth-child(2) span')[ Ratal_2024 = Selector(text=page_source_zcfz).css("div.zcfzb_table tr:nth-child(59) td:nth-child(2) span::text").extract_first()
0].text Ratal_2023 = Selector(text=page_source_zcfz).css("div.zcfzb_table tr:nth-child(59) td:nth-child(3) span::text").extract_first()
FZZJ_2021 = \ Ratal_2022 = Selector(text=page_source_zcfz).css("div.zcfzb_table tr:nth-child(59) td:nth-child(4) span::text").extract_first()
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(74) td:nth-child(3) span')[
0].text
FZZJ_2020 = \
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(74) td:nth-child(4) span')[
0].text
# 纳税额
Ratal_2022 = \
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(59) td:nth-child(2) span')[
0].text
Ratal_2021 = \
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(59) td:nth-child(3) span')[
0].text
Ratal_2020 = \
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(59) td:nth-child(4) span')[
0].text
break break
elif "金融" in company_name or "证券" in company_name or "期货" in company_name: elif "证券" in company_name or "期货" in company_name:
# 资产总额 # 资产总额(资产总计)
TotalAssets_2022 = \ TotalAssets_2024 = Selector(text=page_source_zcfz).css(
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(44) td:nth-child(2) span')[ "div.zcfzb_table tr:nth-child(44) td:nth-child(2) span::text").extract_first()
0].text TotalAssets_2023 = Selector(text=page_source_zcfz).css(
TotalAssets_2021 = \ "div.zcfzb_table tr:nth-child(44) td:nth-child(3) span::text").extract_first()
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(44) td:nth-child(3) span')[ TotalAssets_2022 = Selector(text=page_source_zcfz).css(
0].text "div.zcfzb_table tr:nth-child(44) td:nth-child(4) span::text").extract_first()
TotalAssets_2020 = \
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(44) td:nth-child(4) span')[ # 销售收入(营业收入)
0].text SalesProceeds_2024 = Selector(text=page_source_lrb).css(
# 销售收入 "div.lrb_table tr:nth-child(2) td:nth-child(2) span::text").extract_first()
SalesProceeds_2022 = \ SalesProceeds_2023 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(2) span')[0].text "div.lrb_table tr:nth-child(2) td:nth-child(3) span::text").extract_first()
SalesProceeds_2021 = \ SalesProceeds_2022 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(3) span')[0].text "div.lrb_table tr:nth-child(2) td:nth-child(4) span::text").extract_first()
SalesProceeds_2020 = \
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(4) span')[0].text
# 利润总额 # 利润总额
LRZE_2022 = \ LRZE_2024 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(40) td:nth-child(2) span')[0].text "div.lrb_table tr:nth-child(40) td:nth-child(2) span::text").extract_first()
LRZE_2021 = \ LRZE_2023 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(40) td:nth-child(3) span')[0].text "div.lrb_table tr:nth-child(40) td:nth-child(3) span::text").extract_first()
LRZE_2020 = \ LRZE_2022 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(40) td:nth-child(4) span')[0].text "div.lrb_table tr:nth-child(40) td:nth-child(4) span::text").extract_first()
# 净利润 # 净利润
JLR_2022 = \ JLR_2024 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(42) td:nth-child(2) span')[0].text "div.lrb_table tr:nth-child(42) td:nth-child(2) span::text").extract_first()
JLR_2021 = \ JLR_2023 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(42) td:nth-child(3) span')[0].text "div.lrb_table tr:nth-child(42) td:nth-child(3) span::text").extract_first()
JLR_2020 = \ JLR_2022 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(42) td:nth-child(4) span')[0].text "div.lrb_table tr:nth-child(42) td:nth-child(4) span::text").extract_first()
# 负债总计 # 负债总计(负债合计)
FZZJ_2022 = \ FZZJ_2024 = Selector(text=page_source_zcfz).css(
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(76) td:nth-child(2) span')[ "div.zcfzb_table tr:nth-child(76) td:nth-child(2) span::text").extract_first()
0].text FZZJ_2023 = Selector(text=page_source_zcfz).css(
FZZJ_2021 = \ "div.zcfzb_table tr:nth-child(76) td:nth-child(3) span::text").extract_first()
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(76) td:nth-child(3) span')[ FZZJ_2022 = Selector(text=page_source_zcfz).css(
0].text "div.zcfzb_table tr:nth-child(76) td:nth-child(4) span::text").extract_first()
FZZJ_2020 = \ # 纳税额(应交税费)
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(76) td:nth-child(4) span')[ Ratal_2024 = Selector(text=page_source_zcfz).css(
0].text "div.zcfzb_table tr:nth-child(61) td:nth-child(2) span::text").extract_first()
# 纳税额 Ratal_2023 = Selector(text=page_source_zcfz).css(
Ratal_2022 = \ "div.zcfzb_table tr:nth-child(61) td:nth-child(3) span::text").extract_first()
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(61) td:nth-child(2) span')[ Ratal_2022 = Selector(text=page_source_zcfz).css(
0].text "div.zcfzb_table tr:nth-child(61) td:nth-child(4) span::text").extract_first()
Ratal_2021 = \
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(61) td:nth-child(3) span')[
0].text
Ratal_2020 = \
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(61) td:nth-child(4) span')[
0].text
break break
elif "保险" in company_name: elif "保险" in company_name:
# 资产总额 # 资产总额(资产总计)
TotalAssets_2022 = \ TotalAssets_2024 = Selector(text=page_source_zcfz).css(
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(51) td:nth-child(2) span')[ "div.zcfzb_table tr:nth-child(51) td:nth-child(2) span::text").extract_first()
0].text TotalAssets_2023 = Selector(text=page_source_zcfz).css(
TotalAssets_2021 = \ "div.zcfzb_table tr:nth-child(51) td:nth-child(3) span::text").extract_first()
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(51) td:nth-child(3) span')[ TotalAssets_2022 = Selector(text=page_source_zcfz).css(
0].text "div.zcfzb_table tr:nth-child(51) td:nth-child(4) span::text").extract_first()
TotalAssets_2020 = \
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(51) td:nth-child(4) span')[ # 销售收入(营业收入)
0].text SalesProceeds_2024 = Selector(text=page_source_lrb).css(
# 销售收入 "div.lrb_table tr:nth-child(2) td:nth-child(2) span::text").extract_first()
SalesProceeds_2022 = \ SalesProceeds_2023 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(2) span')[0].text "div.lrb_table tr:nth-child(2) td:nth-child(3) span::text").extract_first()
SalesProceeds_2021 = \ SalesProceeds_2022 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(3) span')[0].text "div.lrb_table tr:nth-child(2) td:nth-child(4) span::text").extract_first()
SalesProceeds_2020 = \
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(4) span')[0].text
# 利润总额 # 利润总额
LRZE_2022 = \ LRZE_2024 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(53) td:nth-child(2) span')[0].text "div.lrb_table tr:nth-child(53) td:nth-child(2) span::text").extract_first()
LRZE_2021 = \ LRZE_2023 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(53) td:nth-child(3) span')[0].text "div.lrb_table tr:nth-child(53) td:nth-child(3) span::text").extract_first()
LRZE_2020 = \ LRZE_2022 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(53) td:nth-child(4) span')[0].text "div.lrb_table tr:nth-child(53) td:nth-child(4) span::text").extract_first()
# 净利润 # 净利润
JLR_2022 = \ JLR_2024 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(57) td:nth-child(2) span')[0].text "div.lrb_table tr:nth-child(57) td:nth-child(2) span::text").extract_first()
JLR_2021 = \ JLR_2023 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(57) td:nth-child(3) span')[0].text "div.lrb_table tr:nth-child(57) td:nth-child(3) span::text").extract_first()
JLR_2020 = \ JLR_2022 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(57) td:nth-child(4) span')[0].text "div.lrb_table tr:nth-child(57) td:nth-child(4) span::text").extract_first()
# 负债总计 # 负债总计(负债合计)
FZZJ_2022 = \ FZZJ_2024 = Selector(text=page_source_zcfz).css(
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(98) td:nth-child(2) span')[ "div.zcfzb_table tr:nth-child(98) td:nth-child(2) span::text").extract_first()
0].text FZZJ_2023 = Selector(text=page_source_zcfz).css(
FZZJ_2021 = \ "div.zcfzb_table tr:nth-child(98) td:nth-child(3) span::text").extract_first()
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(98) td:nth-child(3) span')[ FZZJ_2022 = Selector(text=page_source_zcfz).css(
0].text "div.zcfzb_table tr:nth-child(98) td:nth-child(4) span::text").extract_first()
FZZJ_2020 = \ # 纳税额(应交税费)
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(98) td:nth-child(4) span')[ Ratal_2024 = Selector(text=page_source_zcfz).css(
0].text "div.zcfzb_table tr:nth-child(72) td:nth-child(2) span::text").extract_first()
# 纳税额 Ratal_2023 = Selector(text=page_source_zcfz).css(
Ratal_2022 = \ "div.zcfzb_table tr:nth-child(72) td:nth-child(3) span::text").extract_first()
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(72) td:nth-child(2) span')[ Ratal_2022 = Selector(text=page_source_zcfz).css(
0].text "div.zcfzb_table tr:nth-child(72) td:nth-child(4) span::text").extract_first()
Ratal_2021 = \
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(72) td:nth-child(3) span')[
0].text
Ratal_2020 = \
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(72) td:nth-child(4) span')[
0].text
break break
else: else:
# 资产总额 # 资产总额(资产总计)
TotalAssets_2022 = \ TotalAssets_2024 = Selector(text=page_source_zcfz).css(
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(69) td:nth-child(2) span')[ "div.zcfzb_table tr:nth-child(69) td:nth-child(2) span::text").extract_first()
0].text TotalAssets_2023 = Selector(text=page_source_zcfz).css(
TotalAssets_2021 = \ "div.zcfzb_table tr:nth-child(69) td:nth-child(3) span::text").extract_first()
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(69) td:nth-child(3) span')[ TotalAssets_2022 = Selector(text=page_source_zcfz).css(
0].text "div.zcfzb_table tr:nth-child(69) td:nth-child(4) span::text").extract_first()
TotalAssets_2020 = \
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(69) td:nth-child(4) span')[ # 销售收入(营业收入)
0].text SalesProceeds_2024 = Selector(text=page_source_lrb).css(
# 销售收入 "div.lrb_table tr:nth-child(2) td:nth-child(2) span::text").extract_first()
SalesProceeds_2022 = \ SalesProceeds_2023 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(2) span')[ "div.lrb_table tr:nth-child(2) td:nth-child(3) span::text").extract_first()
0].text SalesProceeds_2022 = Selector(text=page_source_lrb).css(
SalesProceeds_2021 = \ "div.lrb_table tr:nth-child(2) td:nth-child(4) span::text").extract_first()
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(3) span')[
0].text
SalesProceeds_2020 = \
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(4) span')[
0].text
# 利润总额 # 利润总额
LRZE_2022 = \ LRZE_2024 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(48) td:nth-child(2) span')[ "div.lrb_table tr:nth-child(48) td:nth-child(2) span::text").extract_first()
0].text LRZE_2023 = Selector(text=page_source_lrb).css(
LRZE_2021 = \ "div.lrb_table tr:nth-child(48) td:nth-child(3) span::text").extract_first()
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(48) td:nth-child(3) span')[ LRZE_2022 = Selector(text=page_source_lrb).css(
0].text "div.lrb_table tr:nth-child(48) td:nth-child(4) span::text").extract_first()
LRZE_2020 = \
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(48) td:nth-child(4) span')[
0].text
# 净利润 # 净利润
JLR_2022 = \ JLR_2024 = Selector(text=page_source_lrb).css(
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(52) td:nth-child(2) span')[ "div.lrb_table tr:nth-child(52) td:nth-child(2) span::text").extract_first()
0].text JLR_2023 = Selector(text=page_source_lrb).css(
JLR_2021 = \ "div.lrb_table tr:nth-child(52) td:nth-child(3) span::text").extract_first()
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(52) td:nth-child(3) span')[ JLR_2022 = Selector(text=page_source_lrb).css(
0].text "div.lrb_table tr:nth-child(52) td:nth-child(4) span::text").extract_first()
JLR_2020 = \ # 负债总计(负债合计)
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(52) td:nth-child(4) span')[ FZZJ_2024 = Selector(text=page_source_zcfz).css(
0].text "div.zcfzb_table tr:nth-child(126) td:nth-child(2) span::text").extract_first()
# 负债总计 FZZJ_2023 = Selector(text=page_source_zcfz).css(
FZZJ_2022 = \ "div.zcfzb_table tr:nth-child(126) td:nth-child(3) span::text").extract_first()
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(126) td:nth-child(2) span')[ FZZJ_2022 = Selector(text=page_source_zcfz).css(
0].text "div.zcfzb_table tr:nth-child(126) td:nth-child(4) span::text").extract_first()
FZZJ_2021 = \ # 纳税额(应交税费)
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(126) td:nth-child(3) span')[ Ratal_2024 = Selector(text=page_source_zcfz).css(
0].text "div.zcfzb_table tr:nth-child(88) td:nth-child(2) span::text").extract_first()
FZZJ_2020 = \ Ratal_2023 = Selector(text=page_source_zcfz).css(
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(126) td:nth-child(4) span')[ "div.zcfzb_table tr:nth-child(88) td:nth-child(3) span::text").extract_first()
0].text Ratal_2022 = Selector(text=page_source_zcfz).css(
# 纳税额 "div.zcfzb_table tr:nth-child(88) td:nth-child(4) span::text").extract_first()
Ratal_2022 = \
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(88) td:nth-child(2) span')[
0].text
Ratal_2021 = \
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(88) td:nth-child(3) span')[
0].text
Ratal_2020 = \
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(88) td:nth-child(4) span')[
0].text
break break
else: else:
print("数据获取失败") print("数据获取失败")
@ -284,21 +240,21 @@ def get_company_finance(stock_code_list):
if flag != 1: if flag != 1:
company_finance_details = {"股票代码":code,"股票名称":name,"公司名称":company_name, company_finance_details = {"股票代码":code,"股票名称":name,"公司名称":company_name,
"资产总额2022":TotalAssets_2022,"资产总额2021":TotalAssets_2021,"资产总额2020":TotalAssets_2020, "资产总额2024":TotalAssets_2024,"资产总额2023":TotalAssets_2023,"资产总额2022":TotalAssets_2022,
"销售收入2022":SalesProceeds_2022,"销售收入2021":SalesProceeds_2021,"销售收入2020":SalesProceeds_2020, "销售收入2024":SalesProceeds_2024,"销售收入2023":SalesProceeds_2023,"销售收入2022":SalesProceeds_2022,
"利润总额2022":LRZE_2022,"利润总额2021":LRZE_2021,"利润总额2020":LRZE_2020, "利润总额2024":LRZE_2024,"利润总额2023":LRZE_2023,"利润总额2022":LRZE_2022,
"净利润2022":JLR_2022,"净利润2021":JLR_2021,"净利润2020":JLR_2020, "净利润2024":JLR_2024,"净利润2023":JLR_2023,"净利润2022":JLR_2022,
"负债总计2022":FZZJ_2022,"负债总计2021":FZZJ_2021,"负债总计2020":FZZJ_2020, "负债总计2024":FZZJ_2024,"负债总计2023":FZZJ_2023,"负债总计2022":FZZJ_2022,
"纳税额2022":Ratal_2022,"纳税额2021":Ratal_2021,"纳税额2020":Ratal_2020 "纳税额2024":Ratal_2024,"纳税额2023":Ratal_2023,"纳税额2022":Ratal_2022
} }
print(company_finance_details) print(company_finance_details)
company_finance_details_csv = [code,name,company_name, company_finance_details_csv = [code,name,company_name,
TotalAssets_2022,TotalAssets_2021,TotalAssets_2020, TotalAssets_2024,TotalAssets_2023,TotalAssets_2022,
SalesProceeds_2022,SalesProceeds_2021,SalesProceeds_2020, SalesProceeds_2024,SalesProceeds_2023,SalesProceeds_2022,
LRZE_2022,LRZE_2021,LRZE_2020, LRZE_2024,LRZE_2023,LRZE_2022,
JLR_2022,JLR_2021,JLR_2020, JLR_2024,JLR_2023,JLR_2022,
FZZJ_2022,FZZJ_2021,FZZJ_2020, FZZJ_2024,FZZJ_2023,FZZJ_2022,
Ratal_2022,Ratal_2021,Ratal_2020] Ratal_2024,Ratal_2023,Ratal_2022]
with open('company_finance_details.csv', 'a', newline='') as newfile: with open('company_finance_details.csv', 'a', newline='') as newfile:
writer = csv.writer(newfile) writer = csv.writer(newfile)
writer.writerow(company_finance_details_csv) writer.writerow(company_finance_details_csv)
@ -317,5 +273,4 @@ with open(file,'r') as file:
stock_code_list = [] stock_code_list = []
for code in code_list: for code in code_list:
stock_code_list.append(code) stock_code_list.append(code)
del stock_code_list[0]
get_company_finance(stock_code_list) get_company_finance(stock_code_list)

2
eastmoney_getcompany.py

@ -37,7 +37,7 @@ def get_company(stock_code_list):
company_details = {"股票代码":code,"股票名称":name,"公司名称":company_name} company_details = {"股票代码":code,"股票名称":name,"公司名称":company_name}
print(company_details) print(company_details)
company_details_csv = [code,name,company_name] company_details_csv = [code,name,company_name]
with open('code_2.csv', 'a', newline='') as newfile: with open('company.csv', 'a', newline='') as newfile:
writer = csv.writer(newfile) writer = csv.writer(newfile)
writer.writerow(company_details_csv) writer.writerow(company_details_csv)
newfile.close() newfile.close()

77
import_finance.py

@ -3,11 +3,11 @@ import csv
import pymysql import pymysql
# 本地 # 本地
# db_host = '127.0.0.1' db_host = '127.0.0.1'
# db_port = 3306 db_port = 3306
# db_user = 'root' db_user = 'root'
# db_password = '123456' db_password = '123456'
# db_name = 'crawler' db_name = 'chace'
# 测试服 # 测试服
# db_host = '47.112.242.103' # db_host = '47.112.242.103'
@ -17,11 +17,11 @@ import pymysql
# db_name = 'chace' # db_name = 'chace'
# 正式服 # 正式服
db_host = '119.23.173.194' # db_host = '119.23.173.194'
db_port = 17600 # db_port = 17600
db_user = 'ccwfinances' # db_user = 'ccwfinances'
db_password = 'CCWfinacial@231215#' # db_password = 'CCWfinacial@231215#'
db_name = 'chace' # db_name = 'chace'
def turn_num(s): def turn_num(s):
@ -40,44 +40,43 @@ with open(file,'r') as file:
company_finance_details_list = [] company_finance_details_list = []
for company_finance_details_csv in company_finance_details_list_csv: for company_finance_details_csv in company_finance_details_list_csv:
company_finance_details_list.append(company_finance_details_csv) company_finance_details_list.append(company_finance_details_csv)
del company_finance_details_list[0]
for company_finance_details in company_finance_details_list: for company_finance_details in company_finance_details_list:
stock_code = company_finance_details[0] stock_code = company_finance_details[0].zfill(6)
stock_name = company_finance_details[1] stock_name = company_finance_details[1]
company_name = company_finance_details[2] company_name = company_finance_details[2]
del company_finance_details[0:3] del company_finance_details[0:3]
TotalAssets_2022 = turn_num(company_finance_details[0]) TotalAssets_2024 = turn_num(company_finance_details[0])
TotalAssets_2021 = turn_num(company_finance_details[1]) TotalAssets_2023 = turn_num(company_finance_details[1])
TotalAssets_2020 = turn_num(company_finance_details[2]) TotalAssets_2022 = turn_num(company_finance_details[2])
SalesProceeds_2022 = turn_num(company_finance_details[3]) SalesProceeds_2024 = turn_num(company_finance_details[3])
SalesProceeds_2021 = turn_num(company_finance_details[4]) SalesProceeds_2023 = turn_num(company_finance_details[4])
SalesProceeds_2020 = turn_num(company_finance_details[5]) SalesProceeds_2022 = turn_num(company_finance_details[5])
LRZE_2022 = turn_num(company_finance_details[6]) LRZE_2024 = turn_num(company_finance_details[6])
LRZE_2021 = turn_num(company_finance_details[7]) LRZE_2023 = turn_num(company_finance_details[7])
LRZE_2020 = turn_num(company_finance_details[8]) LRZE_2022 = turn_num(company_finance_details[8])
JLR_2022 = turn_num(company_finance_details[9]) JLR_2024 = turn_num(company_finance_details[9])
JLR_2021 = turn_num(company_finance_details[10]) JLR_2023 = turn_num(company_finance_details[10])
JLR_2020 = turn_num(company_finance_details[11]) JLR_2022 = turn_num(company_finance_details[11])
FZZJ_2022 = turn_num(company_finance_details[12]) FZZJ_2024 = turn_num(company_finance_details[12])
FZZJ_2021 = turn_num(company_finance_details[13]) FZZJ_2023 = turn_num(company_finance_details[13])
FZZJ_2020 = turn_num(company_finance_details[14]) FZZJ_2022 = turn_num(company_finance_details[14])
JZC_2024 = TotalAssets_2024 - FZZJ_2024
JZC_2023 = TotalAssets_2023 - FZZJ_2023
JZC_2022 = TotalAssets_2022 - FZZJ_2022 JZC_2022 = TotalAssets_2022 - FZZJ_2022
JZC_2021 = TotalAssets_2021 - FZZJ_2021 Ratal_2024 = turn_num(company_finance_details[15])
JZC_2020 = TotalAssets_2020 - FZZJ_2020 Ratal_2023 = turn_num(company_finance_details[16])
Ratal_2022 = turn_num(company_finance_details[15]) Ratal_2022 = turn_num(company_finance_details[17])
Ratal_2021 = turn_num(company_finance_details[16])
Ratal_2020 = turn_num(company_finance_details[17])
db = pymysql.connect(user=db_user, passwd=db_password, db=db_name, host=db_host, port=int(db_port), db = pymysql.connect(user=db_user, passwd=db_password, db=db_name, host=db_host, port=int(db_port),
charset="utf8mb4", use_unicode=True) charset="utf8mb4", use_unicode=True)
cursor = db.cursor(pymysql.cursors.DictCursor) cursor = db.cursor(pymysql.cursors.DictCursor)
sql = "INSERT INTO ccw_company_finance_details(stock_code,stock_name,company_name,TotalAssets_2022,TotalAssets_2021,TotalAssets_2020,SalesProceeds_2022,SalesProceeds_2021,SalesProceeds_2020,LRZE_2022,LRZE_2021,LRZE_2020,JLR_2022,JLR_2021,JLR_2020,JZC_2022,JZC_2021,JZC_2020,Ratal_2022,Ratal_2021,Ratal_2020,is_delisted) VALUES('%s','%s','%s',%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,0)" % ( sql = "INSERT INTO ccw_company_finance_details(stock_code,stock_name,company_name,TotalAssets_1,TotalAssets_2,TotalAssets_3,SalesProceeds_1,SalesProceeds_2,SalesProceeds_3,LRZE_1,LRZE_2,LRZE_3,JLR_1,JLR_2,JLR_3,JZC_1,JZC_2,JZC_3,Ratal_1,Ratal_2,Ratal_3,is_delisted,Year) VALUES('%s','%s','%s',%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,0,'2025')" % (
stock_code,stock_name,company_name, stock_code,stock_name,company_name,
TotalAssets_2022,TotalAssets_2021,TotalAssets_2020, TotalAssets_2024,TotalAssets_2023,TotalAssets_2022,
SalesProceeds_2022,SalesProceeds_2021,SalesProceeds_2020, SalesProceeds_2024,SalesProceeds_2023,SalesProceeds_2022,
LRZE_2022,LRZE_2021,LRZE_2020, LRZE_2024,LRZE_2023,LRZE_2022,
JLR_2022,JLR_2021,JLR_2020, JLR_2024,JLR_2023,JLR_2022,
JZC_2022,JZC_2021,JZC_2020, JZC_2024,JZC_2023,JZC_2022,
Ratal_2022,Ratal_2021,Ratal_2020 Ratal_2024,Ratal_2023,Ratal_2022
) )
print(sql) print(sql)
cursor.execute(sql) cursor.execute(sql)

343
stock_code.csv

File diff suppressed because it is too large
Loading…
Cancel
Save