commit
6133dcb014
7 changed files with 6174 additions and 0 deletions
@ -0,0 +1,8 @@ |
|||||
|
# 默认忽略的文件 |
||||
|
/shelf/ |
||||
|
/workspace.xml |
||||
|
# 基于编辑器的 HTTP 客户端请求 |
||||
|
/httpRequests/ |
||||
|
# Datasource local storage ignored files |
||||
|
/dataSources/ |
||||
|
/dataSources.local.xml |
||||
@ -0,0 +1,4 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project version="4"> |
||||
|
<component name="VcsDirectoryMappings" defaultProject="true" /> |
||||
|
</project> |
||||
@ -0,0 +1,4 @@ |
|||||
|
1.先到东方财富终端导出股票代码以及股票名称,保存到stock_code中; |
||||
|
2.执行eastmoney_getcompany.py,获得公司名称company.csv; |
||||
|
3.执行eastmoney.py,获得公司详细财务信息company_finance_details.csv,其中由于某些公司css布局会有特殊情况采集不到,会收集到company_error.csv中,需要修改css代码重新爬取或者手动采集; |
||||
|
4.整理好数据后,执行import_finance.py将数据导入数据库中。 |
||||
@ -0,0 +1,321 @@ |
|||||
|
# coding=utf-8 |
||||
|
import csv |
||||
|
import time |
||||
|
from selenium import webdriver |
||||
|
|
||||
|
MAX_RETRY_TIME = 150 |
||||
|
URL = "https://emweb.securities.eastmoney.com/pc_hsf10/pages/index.html?type=web&code=%s&color=b#/cwfx" |
||||
|
|
||||
|
def get_company_finance(stock_code_list): |
||||
|
browser = webdriver.Firefox() |
||||
|
for stock_code in stock_code_list: |
||||
|
code = stock_code[0] |
||||
|
name = stock_code[1] |
||||
|
company_name = stock_code[2] |
||||
|
stock_code = stock_code[0] |
||||
|
if stock_code[0] == '6': |
||||
|
stock_code = 'SH' + stock_code |
||||
|
elif stock_code[0] == '8' or stock_code[0] == '4': |
||||
|
stock_code = 'BJ' + stock_code |
||||
|
else: |
||||
|
stock_code = 'SZ' + stock_code |
||||
|
url = URL % stock_code |
||||
|
browser.execute_script('window.open("%s")' % url) |
||||
|
|
||||
|
retry_time = 0 |
||||
|
while True: |
||||
|
try: |
||||
|
if retry_time < MAX_RETRY_TIME: |
||||
|
browser.switch_to.window(browser.window_handles[1]) |
||||
|
zcfzb_button = browser.find_elements_by_css_selector('div.section.zcfzb ul:last-child li:nth-child(2)')[0] |
||||
|
lrb_button = browser.find_elements_by_css_selector('div.section.lrb ul:last-child li:nth-child(2)')[0] |
||||
|
browser.execute_script("arguments[0].click();", zcfzb_button) |
||||
|
time.sleep(0.2) |
||||
|
browser.execute_script("arguments[0].click();", lrb_button) |
||||
|
time.sleep(0.2) |
||||
|
break |
||||
|
else: |
||||
|
print("年报按钮获取失败") |
||||
|
return |
||||
|
except: |
||||
|
retry_time += 1 |
||||
|
time.sleep(0.2) |
||||
|
|
||||
|
flag = 0 |
||||
|
retry_time = 0 |
||||
|
while True: |
||||
|
try: |
||||
|
if retry_time < MAX_RETRY_TIME: |
||||
|
if "银行" in company_name or "信托" in company_name: |
||||
|
# 资产总额 |
||||
|
TotalAssets_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(43) td:nth-child(2) span')[ |
||||
|
0].text |
||||
|
TotalAssets_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(43) td:nth-child(3) span')[ |
||||
|
0].text |
||||
|
TotalAssets_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(43) td:nth-child(4) span')[ |
||||
|
0].text |
||||
|
# 销售收入 |
||||
|
SalesProceeds_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(2) span')[0].text |
||||
|
SalesProceeds_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(3) span')[0].text |
||||
|
SalesProceeds_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(4) span')[0].text |
||||
|
# 利润总额 |
||||
|
LRZE_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(35) td:nth-child(2) span')[0].text |
||||
|
LRZE_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(35) td:nth-child(3) span')[0].text |
||||
|
LRZE_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(35) td:nth-child(4) span')[0].text |
||||
|
# 净利润 |
||||
|
JLR_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(39) td:nth-child(2) span')[0].text |
||||
|
JLR_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(39) td:nth-child(3) span')[0].text |
||||
|
JLR_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(39) td:nth-child(4) span')[0].text |
||||
|
# 负债总计 |
||||
|
FZZJ_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(74) td:nth-child(2) span')[ |
||||
|
0].text |
||||
|
FZZJ_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(74) td:nth-child(3) span')[ |
||||
|
0].text |
||||
|
FZZJ_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(74) td:nth-child(4) span')[ |
||||
|
0].text |
||||
|
# 纳税额 |
||||
|
Ratal_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(59) td:nth-child(2) span')[ |
||||
|
0].text |
||||
|
Ratal_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(59) td:nth-child(3) span')[ |
||||
|
0].text |
||||
|
Ratal_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(59) td:nth-child(4) span')[ |
||||
|
0].text |
||||
|
break |
||||
|
elif "金融" in company_name or "证券" in company_name or "期货" in company_name: |
||||
|
# 资产总额 |
||||
|
TotalAssets_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(44) td:nth-child(2) span')[ |
||||
|
0].text |
||||
|
TotalAssets_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(44) td:nth-child(3) span')[ |
||||
|
0].text |
||||
|
TotalAssets_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(44) td:nth-child(4) span')[ |
||||
|
0].text |
||||
|
# 销售收入 |
||||
|
SalesProceeds_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(2) span')[0].text |
||||
|
SalesProceeds_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(3) span')[0].text |
||||
|
SalesProceeds_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(4) span')[0].text |
||||
|
# 利润总额 |
||||
|
LRZE_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(40) td:nth-child(2) span')[0].text |
||||
|
LRZE_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(40) td:nth-child(3) span')[0].text |
||||
|
LRZE_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(40) td:nth-child(4) span')[0].text |
||||
|
# 净利润 |
||||
|
JLR_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(42) td:nth-child(2) span')[0].text |
||||
|
JLR_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(42) td:nth-child(3) span')[0].text |
||||
|
JLR_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(42) td:nth-child(4) span')[0].text |
||||
|
# 负债总计 |
||||
|
FZZJ_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(76) td:nth-child(2) span')[ |
||||
|
0].text |
||||
|
FZZJ_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(76) td:nth-child(3) span')[ |
||||
|
0].text |
||||
|
FZZJ_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(76) td:nth-child(4) span')[ |
||||
|
0].text |
||||
|
# 纳税额 |
||||
|
Ratal_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(61) td:nth-child(2) span')[ |
||||
|
0].text |
||||
|
Ratal_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(61) td:nth-child(3) span')[ |
||||
|
0].text |
||||
|
Ratal_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(61) td:nth-child(4) span')[ |
||||
|
0].text |
||||
|
break |
||||
|
elif "保险" in company_name: |
||||
|
# 资产总额 |
||||
|
TotalAssets_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(51) td:nth-child(2) span')[ |
||||
|
0].text |
||||
|
TotalAssets_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(51) td:nth-child(3) span')[ |
||||
|
0].text |
||||
|
TotalAssets_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(51) td:nth-child(4) span')[ |
||||
|
0].text |
||||
|
# 销售收入 |
||||
|
SalesProceeds_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(2) span')[0].text |
||||
|
SalesProceeds_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(3) span')[0].text |
||||
|
SalesProceeds_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(4) span')[0].text |
||||
|
# 利润总额 |
||||
|
LRZE_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(53) td:nth-child(2) span')[0].text |
||||
|
LRZE_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(53) td:nth-child(3) span')[0].text |
||||
|
LRZE_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(53) td:nth-child(4) span')[0].text |
||||
|
# 净利润 |
||||
|
JLR_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(57) td:nth-child(2) span')[0].text |
||||
|
JLR_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(57) td:nth-child(3) span')[0].text |
||||
|
JLR_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(57) td:nth-child(4) span')[0].text |
||||
|
# 负债总计 |
||||
|
FZZJ_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(98) td:nth-child(2) span')[ |
||||
|
0].text |
||||
|
FZZJ_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(98) td:nth-child(3) span')[ |
||||
|
0].text |
||||
|
FZZJ_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(98) td:nth-child(4) span')[ |
||||
|
0].text |
||||
|
# 纳税额 |
||||
|
Ratal_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(72) td:nth-child(2) span')[ |
||||
|
0].text |
||||
|
Ratal_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(72) td:nth-child(3) span')[ |
||||
|
0].text |
||||
|
Ratal_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(72) td:nth-child(4) span')[ |
||||
|
0].text |
||||
|
break |
||||
|
else: |
||||
|
# 资产总额 |
||||
|
TotalAssets_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(69) td:nth-child(2) span')[ |
||||
|
0].text |
||||
|
TotalAssets_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(69) td:nth-child(3) span')[ |
||||
|
0].text |
||||
|
TotalAssets_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(69) td:nth-child(4) span')[ |
||||
|
0].text |
||||
|
# 销售收入 |
||||
|
SalesProceeds_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(2) span')[ |
||||
|
0].text |
||||
|
SalesProceeds_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(3) span')[ |
||||
|
0].text |
||||
|
SalesProceeds_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(2) td:nth-child(4) span')[ |
||||
|
0].text |
||||
|
# 利润总额 |
||||
|
LRZE_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(48) td:nth-child(2) span')[ |
||||
|
0].text |
||||
|
LRZE_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(48) td:nth-child(3) span')[ |
||||
|
0].text |
||||
|
LRZE_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(48) td:nth-child(4) span')[ |
||||
|
0].text |
||||
|
# 净利润 |
||||
|
JLR_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(52) td:nth-child(2) span')[ |
||||
|
0].text |
||||
|
JLR_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(52) td:nth-child(3) span')[ |
||||
|
0].text |
||||
|
JLR_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.lrb_table tr:nth-child(52) td:nth-child(4) span')[ |
||||
|
0].text |
||||
|
# 负债总计 |
||||
|
FZZJ_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(126) td:nth-child(2) span')[ |
||||
|
0].text |
||||
|
FZZJ_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(126) td:nth-child(3) span')[ |
||||
|
0].text |
||||
|
FZZJ_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(126) td:nth-child(4) span')[ |
||||
|
0].text |
||||
|
# 纳税额 |
||||
|
Ratal_2022 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(88) td:nth-child(2) span')[ |
||||
|
0].text |
||||
|
Ratal_2021 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(88) td:nth-child(3) span')[ |
||||
|
0].text |
||||
|
Ratal_2020 = \ |
||||
|
browser.find_elements_by_css_selector('div.zcfzb_table tr:nth-child(88) td:nth-child(4) span')[ |
||||
|
0].text |
||||
|
break |
||||
|
else: |
||||
|
print("数据获取失败") |
||||
|
return |
||||
|
except: |
||||
|
if retry_time > 25: |
||||
|
with open('company_error.csv', 'a', newline='') as newfile: |
||||
|
writer = csv.writer(newfile) |
||||
|
company_a = [code,name,company_name] |
||||
|
writer.writerow(company_a) |
||||
|
newfile.close() |
||||
|
flag = 1 |
||||
|
break |
||||
|
retry_time += 1 |
||||
|
time.sleep(0.2) |
||||
|
|
||||
|
if flag != 1: |
||||
|
company_finance_details = {"股票代码":code,"股票名称":name,"公司名称":company_name, |
||||
|
"资产总额2022":TotalAssets_2022,"资产总额2021":TotalAssets_2021,"资产总额2020":TotalAssets_2020, |
||||
|
"销售收入2022":SalesProceeds_2022,"销售收入2021":SalesProceeds_2021,"销售收入2020":SalesProceeds_2020, |
||||
|
"利润总额2022":LRZE_2022,"利润总额2021":LRZE_2021,"利润总额2020":LRZE_2020, |
||||
|
"净利润2022":JLR_2022,"净利润2021":JLR_2021,"净利润2020":JLR_2020, |
||||
|
"负债总计2022":FZZJ_2022,"负债总计2021":FZZJ_2021,"负债总计2020":FZZJ_2020, |
||||
|
"纳税额2022":Ratal_2022,"纳税额2021":Ratal_2021,"纳税额2020":Ratal_2020 |
||||
|
} |
||||
|
print(company_finance_details) |
||||
|
company_finance_details_csv = [code,name,company_name, |
||||
|
TotalAssets_2022,TotalAssets_2021,TotalAssets_2020, |
||||
|
SalesProceeds_2022,SalesProceeds_2021,SalesProceeds_2020, |
||||
|
LRZE_2022,LRZE_2021,LRZE_2020, |
||||
|
JLR_2022,JLR_2021,JLR_2020, |
||||
|
FZZJ_2022,FZZJ_2021,FZZJ_2020, |
||||
|
Ratal_2022,Ratal_2021,Ratal_2020] |
||||
|
with open('company_finance_details.csv', 'a', newline='') as newfile: |
||||
|
writer = csv.writer(newfile) |
||||
|
writer.writerow(company_finance_details_csv) |
||||
|
newfile.close() |
||||
|
browser.close() |
||||
|
browser.switch_to.window(browser.window_handles[0]) |
||||
|
else: |
||||
|
browser.close() |
||||
|
browser.switch_to.window(browser.window_handles[0]) |
||||
|
|
||||
|
browser.close() |
||||
|
|
||||
|
file = 'company.csv' |
||||
|
with open(file,'r') as file: |
||||
|
code_list = csv.reader(file,delimiter=',') |
||||
|
stock_code_list = [] |
||||
|
for code in code_list: |
||||
|
stock_code_list.append(code) |
||||
|
del stock_code_list[0] |
||||
|
get_company_finance(stock_code_list) |
||||
@ -0,0 +1,60 @@ |
|||||
|
import csv |
||||
|
import time |
||||
|
from selenium import webdriver |
||||
|
|
||||
|
MAX_RETRY_TIME = 150 |
||||
|
URL = "https://emweb.securities.eastmoney.com/pc_hsf10/pages/index.html?type=web&code=%s&color=b#/gsgk" |
||||
|
|
||||
|
def get_company(stock_code_list): |
||||
|
try: |
||||
|
browser = webdriver.Firefox() |
||||
|
for stock_code in stock_code_list: |
||||
|
code = stock_code[0] |
||||
|
name = stock_code[1] |
||||
|
stock_code = stock_code[0] |
||||
|
if stock_code[0] == '6': |
||||
|
stock_code = 'SH' + stock_code |
||||
|
elif stock_code[0] == '8' or stock_code[0] == '4': |
||||
|
stock_code = 'BJ' + stock_code |
||||
|
else: |
||||
|
stock_code = 'SZ' + stock_code |
||||
|
url = URL % stock_code |
||||
|
browser.execute_script('window.open("%s")' % url) |
||||
|
|
||||
|
retry_time = 0 |
||||
|
while True: |
||||
|
try: |
||||
|
if retry_time < MAX_RETRY_TIME: |
||||
|
browser.switch_to.window(browser.window_handles[1]) |
||||
|
company_name = browser.find_elements_by_css_selector('div.jbzl_table tr:nth-child(1) td')[0].text |
||||
|
break |
||||
|
else: |
||||
|
return "获取失败" |
||||
|
except: |
||||
|
retry_time += 1 |
||||
|
time.sleep(0.2) |
||||
|
|
||||
|
company_details = {"股票代码":code,"股票名称":name,"公司名称":company_name} |
||||
|
print(company_details) |
||||
|
company_details_csv = [code,name,company_name] |
||||
|
with open('code_2.csv', 'a', newline='') as newfile: |
||||
|
writer = csv.writer(newfile) |
||||
|
writer.writerow(company_details_csv) |
||||
|
newfile.close() |
||||
|
browser.close() |
||||
|
browser.switch_to.window(browser.window_handles[0]) |
||||
|
|
||||
|
browser.close() |
||||
|
|
||||
|
except Exception as e: |
||||
|
print(e) |
||||
|
return "出错啦" |
||||
|
|
||||
|
file = 'stock_code.csv' |
||||
|
with open(file,'r') as file: |
||||
|
code_list = csv.reader(file,delimiter=',') |
||||
|
stock_code_list = [] |
||||
|
for code in code_list: |
||||
|
stock_code_list.append(code) |
||||
|
del stock_code_list[0] |
||||
|
get_company(stock_code_list) |
||||
@ -0,0 +1,85 @@ |
|||||
|
# coding=utf-8 |
||||
|
import csv |
||||
|
import pymysql |
||||
|
|
||||
|
# 本地 |
||||
|
# db_host = '127.0.0.1' |
||||
|
# db_port = 3306 |
||||
|
# db_user = 'root' |
||||
|
# db_password = '123456' |
||||
|
# db_name = 'crawler' |
||||
|
|
||||
|
# 测试服 |
||||
|
# db_host = '47.112.242.103' |
||||
|
# db_port = 17601 |
||||
|
# db_user = 'ccwtdm' |
||||
|
# db_password = 'fhRZLEu562wi23M4QC4iYq615UZEvgeB' |
||||
|
# db_name = 'chace' |
||||
|
|
||||
|
# 正式服 |
||||
|
db_host = '119.23.173.194' |
||||
|
db_port = 17600 |
||||
|
db_user = 'ccwfinances' |
||||
|
db_password = 'CCWfinacial@231215#' |
||||
|
db_name = 'chace' |
||||
|
|
||||
|
|
||||
|
def turn_num(s): |
||||
|
s_num = 0 |
||||
|
if '万亿' in s: |
||||
|
s_num = float(s.replace('万亿','')) * 100000000 |
||||
|
elif '亿' in s: |
||||
|
s_num = float(s.replace('亿','')) * 10000 |
||||
|
elif '万' in s: |
||||
|
s_num = float(s.replace('万','')) |
||||
|
return s_num |
||||
|
|
||||
|
file = 'company_finance_details.csv' |
||||
|
with open(file,'r') as file: |
||||
|
company_finance_details_list_csv = csv.reader(file,delimiter=',') |
||||
|
company_finance_details_list = [] |
||||
|
for company_finance_details_csv in company_finance_details_list_csv: |
||||
|
company_finance_details_list.append(company_finance_details_csv) |
||||
|
del company_finance_details_list[0] |
||||
|
for company_finance_details in company_finance_details_list: |
||||
|
stock_code = company_finance_details[0] |
||||
|
stock_name = company_finance_details[1] |
||||
|
company_name = company_finance_details[2] |
||||
|
del company_finance_details[0:3] |
||||
|
TotalAssets_2022 = turn_num(company_finance_details[0]) |
||||
|
TotalAssets_2021 = turn_num(company_finance_details[1]) |
||||
|
TotalAssets_2020 = turn_num(company_finance_details[2]) |
||||
|
SalesProceeds_2022 = turn_num(company_finance_details[3]) |
||||
|
SalesProceeds_2021 = turn_num(company_finance_details[4]) |
||||
|
SalesProceeds_2020 = turn_num(company_finance_details[5]) |
||||
|
LRZE_2022 = turn_num(company_finance_details[6]) |
||||
|
LRZE_2021 = turn_num(company_finance_details[7]) |
||||
|
LRZE_2020 = turn_num(company_finance_details[8]) |
||||
|
JLR_2022 = turn_num(company_finance_details[9]) |
||||
|
JLR_2021 = turn_num(company_finance_details[10]) |
||||
|
JLR_2020 = turn_num(company_finance_details[11]) |
||||
|
FZZJ_2022 = turn_num(company_finance_details[12]) |
||||
|
FZZJ_2021 = turn_num(company_finance_details[13]) |
||||
|
FZZJ_2020 = turn_num(company_finance_details[14]) |
||||
|
JZC_2022 = TotalAssets_2022 - FZZJ_2022 |
||||
|
JZC_2021 = TotalAssets_2021 - FZZJ_2021 |
||||
|
JZC_2020 = TotalAssets_2020 - FZZJ_2020 |
||||
|
Ratal_2022 = turn_num(company_finance_details[15]) |
||||
|
Ratal_2021 = turn_num(company_finance_details[16]) |
||||
|
Ratal_2020 = turn_num(company_finance_details[17]) |
||||
|
db = pymysql.connect(user=db_user, passwd=db_password, db=db_name, host=db_host, port=int(db_port), |
||||
|
charset="utf8mb4", use_unicode=True) |
||||
|
cursor = db.cursor(pymysql.cursors.DictCursor) |
||||
|
sql = "INSERT INTO ccw_company_finance_details(stock_code,stock_name,company_name,TotalAssets_2022,TotalAssets_2021,TotalAssets_2020,SalesProceeds_2022,SalesProceeds_2021,SalesProceeds_2020,LRZE_2022,LRZE_2021,LRZE_2020,JLR_2022,JLR_2021,JLR_2020,JZC_2022,JZC_2021,JZC_2020,Ratal_2022,Ratal_2021,Ratal_2020,is_delisted) VALUES('%s','%s','%s',%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,0)" % ( |
||||
|
stock_code,stock_name,company_name, |
||||
|
TotalAssets_2022,TotalAssets_2021,TotalAssets_2020, |
||||
|
SalesProceeds_2022,SalesProceeds_2021,SalesProceeds_2020, |
||||
|
LRZE_2022,LRZE_2021,LRZE_2020, |
||||
|
JLR_2022,JLR_2021,JLR_2020, |
||||
|
JZC_2022,JZC_2021,JZC_2020, |
||||
|
Ratal_2022,Ratal_2021,Ratal_2020 |
||||
|
) |
||||
|
print(sql) |
||||
|
cursor.execute(sql) |
||||
|
db.commit() |
||||
|
db.close() |
||||
File diff suppressed because it is too large
Loading…
Reference in new issue