import pandas as pd
import openpyxl
import re
xlsx = '证券行业.xlsx'
df = pd.read_excel(xlsx)#读取excel表格中的内容
exf = openpyxl.load_workbook(xlsx)
sheet = exf.active#选择"活跃"(有内容)的表格
C2 = sheet['C2']
C = sheet['C']
links = [c.value for c in C]
links_1 = links[1:-1]
links_2 = ''.join(links_1)
p = re.compile('"(.*?)","(.*?)"')
list_of_tuple = p.findall(links_2)
df2 = pd.DataFrame({'link': [t[0] for t in list_of_tuple],
'f_name': [t[1] for t in list_of_tuple]})
print(df2)
df2.to_csv('证券行业.csv')
link f_name 0 http://news.windin.com/ns/bulletin.php?code=2B... 方正证券:2020年年度报告 1 http://news.windin.com/ns/bulletin.php?code=D9... 方正证券:2020年年度报告摘要 2 http://news.windin.com/ns/bulletin.php?code=8D... 长江证券:2020年年度报告 3 http://news.windin.com/ns/bulletin.php?code=8C... 长江证券:2020年年度报告摘要 4 http://news.windin.com/ns/bulletin.php?code=B6... 西南证券:2020年年度报告 .. ... ... 495 http://news.windin.com/ns/bulletin.php?code=85... 国金证券:2014年年报摘要 496 http://news.windin.com/ns/bulletin.php?code=1f... 兴业证券:2014年年报 497 http://news.windin.com/ns/bulletin.php?code=63... 兴业证券:2014年年报摘要 498 http://news.windin.com/ns/bulletin.php?code=f5... 华泰证券:2014年年报 499 http://news.windin.com/ns/bulletin.php?code=68... 华泰证券:2014年年报摘要 [500 rows x 2 columns]
import re
import pandas as pd
import os
df = pd.read_csv("证券行业.csv",engine = "python",encoding = "utf-8")
p = re.compile("(?<=\d{4})(年报)|(年年报)")
f_names = [p.sub("年年度报告",f) for f in df.f_name]
df["f_name"] = f_names;del p,f_names
print(df)
Unnamed: 0 link \ 0 0 http://news.windin.com/ns/bulletin.php?code=2B... 1 1 http://news.windin.com/ns/bulletin.php?code=D9... 2 2 http://news.windin.com/ns/bulletin.php?code=8D... 3 3 http://news.windin.com/ns/bulletin.php?code=8C... 4 4 http://news.windin.com/ns/bulletin.php?code=B6... .. ... ... 495 495 http://news.windin.com/ns/bulletin.php?code=85... 496 496 http://news.windin.com/ns/bulletin.php?code=1f... 497 497 http://news.windin.com/ns/bulletin.php?code=63... 498 498 http://news.windin.com/ns/bulletin.php?code=f5... 499 499 http://news.windin.com/ns/bulletin.php?code=68... f_name 0 方正证券:2020年年度报告 1 方正证券:2020年年度报告摘要 2 长江证券:2020年年度报告 3 长江证券:2020年年度报告摘要 4 西南证券:2020年年度报告 .. ... 495 国金证券:2014年年度报告摘要 496 兴业证券:2014年年度报告 497 兴业证券:2014年年度报告摘要 498 华泰证券:2014年年度报告 499 华泰证券:2014年年度报告摘要 [500 rows x 3 columns]
def filter_links(words,df,include=True):
ls=[]
for word in words:
if include:
ls.append([word in f for f in df.f_name])
else:
ls.append([word not in f for f in df.f_name])
index = []
for r in range(len(df)):
flag = not include
for c in range(len(words)):
if include:
flag = flag or ls[c][r]
else:
flag = flag and ls[c][r]
index.append(flag)
df2 = df[index]
return(df2)
df_all = filter_links(["摘要","问询函","社会责任","审计","财务","风险","债券",],df,include= False)
df_orig = filter_links(["(","("],df_all,include = False)
df_updt = filter_links(["(","("],df_all,include = True)
df_updt = filter_links(["取消"],df_updt,include = False)
print(df_orig)
Unnamed: 0 link \ 0 0 http://news.windin.com/ns/bulletin.php?code=2B... 2 2 http://news.windin.com/ns/bulletin.php?code=8D... 4 4 http://news.windin.com/ns/bulletin.php?code=B6... 6 6 http://news.windin.com/ns/bulletin.php?code=6B... 8 8 http://news.windin.com/ns/bulletin.php?code=F8... .. ... ... 490 490 http://news.windin.com/ns/bulletin.php?code=71... 492 492 http://news.windin.com/ns/bulletin.php?code=44... 494 494 http://news.windin.com/ns/bulletin.php?code=3a... 496 496 http://news.windin.com/ns/bulletin.php?code=1f... 498 498 http://news.windin.com/ns/bulletin.php?code=f5... f_name 0 方正证券:2020年年度报告 2 长江证券:2020年年度报告 4 西南证券:2020年年度报告 6 东吴证券:2020年年度报告 8 太平洋:2020年年度报告 .. ... 490 东北证券:2014年年度报告 492 国元证券:2014年年度报告 494 国金证券:2014年年度报告 496 兴业证券:2014年年度报告 498 华泰证券:2014年年度报告 [243 rows x 3 columns]
def sub_with_update(df_updt,df_orig):
df_newest = df_orig.copy()
index_orig=[]
index_updt=[]
for i,f in enumerate(df_orig.f_name):
for j,fn in enumerate(df_updt.f_name):
if f in fn:
index_orig.append(i)
index_updt.append(j)
for n in range(len(index_orig)):
i = index_orig[n]
j = index_updt[n]
df_newest.iloc[i,-2] = df_updt.iloc[j,-2]
return(df_newest)
df_newest = sub_with_update(df_updt,df_orig)
df_newest.sort_values(by = ["f_name"],inplace=True,ignore_index=True)
df_newest["公司简称"] = [f[:4] for f in df_newest.f_name]
print(df_newest)
Unnamed: 0 link \ 0 449 http://news.windin.com/ns/bulletin.php?code=32... 1 353 http://news.windin.com/ns/bulletin.php?code=50... 2 275 http://news.windin.com/ns/bulletin.php?code=2E... 3 113 http://news.windin.com/ns/bulletin.php?code=83... 4 26 http://news.windin.com/ns/bulletin.php?code=8D... .. ... ... 238 439 http://news.windin.com/ns/bulletin.php?code=cd... 239 398 http://news.windin.com/ns/bulletin.php?code=A1... 240 276 http://news.windin.com/ns/bulletin.php?code=72... 241 115 http://news.windin.com/ns/bulletin.php?code=9B... 242 2 http://news.windin.com/ns/bulletin.php?code=8D... f_name 公司简称 0 东兴证券:2015年年度报告 东兴证券 1 东兴证券:2016年年度报告 东兴证券 2 东兴证券:2017年年度报告 东兴证券 3 东兴证券:2019年年度报告 东兴证券 4 东兴证券:2020年年度报告 东兴证券 .. ... ... 238 长江证券:2015年年度报告 长江证券 239 长江证券:2016年年度报告 长江证券 240 长江证券:2017年年度报告 长江证券 241 长江证券:2019年年度报告 长江证券 242 长江证券:2020年年度报告 长江证券 [243 rows x 4 columns]
counts = df_newest["公司简称"].value_counts()
ten_company = []
for cn in counts.index[:10]:
ten_company.append(filter_links([cn],df_newest))
if not os.path.exists("10companies"):
os.makedirs("10companies")
for df_com in ten_company:
cn = df_com["公司简称"].iloc[0]
df_com.to_csv("10companies/%s.csv" % cn)
ten_csv = os.listdir("10companies")
import re
import os
import requests
import pandas as pd
import time
for info in os.listdir('10companies'):#利用for循环对文件夹中十个csv文件分别处理获取链接
domain = os.path.abspath(r'10companies') #获取文件夹的路径
info = os.path.join(domain,info) #将路径与文件名结合起来就是每个文件的完整路径
df = pd.read_csv(info)
links = df["link"];f_names = df["f_name"]
def get_PDF_url(url):
r = requests.get(url); r.encoding = 'utf-8'; html = r.text
r.close() # 已获取html内容,结束connection
p = re.compile('<a href=(.*?)\s.*?>(.*?)</a>', re.DOTALL)
a = p.search(html) # 因第一个<a>即是目标标签,故用search
if a is None:
Warning('没有找到下载链接。请手动检查链接:%s' % url)
return()
else:
href = a.group(1); fname = a.group(2).strip()
href = r.url[:26] + href # 形成完整的链接
return((href,fname))
hrefs = []; fnames = []
for link in links:
href,fname = get_PDF_url(link)
hrefs.append(href)
fnames.append(fname)
time.sleep(0)
df_final_links = pd.DataFrame({'href': hrefs,
'f_name': fnames})
ste = info[-8:-4]#将各个公司的名称赋予ste变量
df_final_links.to_csv("final_links_"+ste+".csv")#将不同公司的年报链接分别储存在不同的csv文件
import os
import requests
import pandas as pd
import time
for info in os.listdir('10companies'):#通过for循环对不同csv文件分别进行处理
domain = os.path.abspath(r'10companies') #获取文件夹的路径
info = os.path.join(domain,info) #将路径与文件名结合起来就是每个文件的完整路径
df = pd.read_csv(info)
ste = info[-8:-4]
df_final_links = pd.read_csv("final_links_"+ste+".csv")
hrefs = df_final_links["href"]
f_names = df_final_links["f_name"]
for i in range(len(hrefs)):#对每个csv文件中已生成的链接通过for循环进行下载
href = hrefs[i];f_name = f_names[i]
r = requests.get(href,allow_redirects=True)
open('%s'%f_name,'wb').write(r.content)
time.sleep(0)
r.close()
import fitz # pip install pymupdf
import re
import pandas as pd
import os
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
df = pd.DataFrame()#创建一个空表格
i = 0
for info in os.listdir('10companies'):
domain = os.path.abspath(r'10companies') #获取文件夹的路径
info = os.path.join(domain,info)
ste = info[-8:-4]#将公司名称赋给st3
filenames = os.listdir(ste)#获取各个公司文件夹中pdf文件的名称
sale = []
for pdf in filenames:
pdf = "\\"+pdf
x = "C:\\Users\\Administrator\\.ipython\\python\\final_exam_report\\"+ste+pdf#形成路径链接(直接用pdf会打不开)
def getText(pdf):#定义函数获取文本
text = ''
doc = fitz.open(pdf)
for page in doc:
text += page.getText()
doc.close()
text = text.replace(" "," \n")
text = text.replace("\n\n","\n")#由于后续subp匹配过程中,有的数字后面没有换行符,无法成功进行非贪婪的匹配,所以通过文本内部符号替换
return(text)
def get_content(pdf):
text = getText(pdf)
p = re.compile('(?<=\\n)\D、\s*\D*?主要\D*?数据\D*?\s*(?=\\n)(.*?)经营活动产生的',re.DOTALL)#定位各个年报固定位置的内容
content = p.search(text).group(0)
return(content)
def parse_data_line(pdf):
content = get_content(pdf)
subp = "([0-9,.%\- ]*?)\n"
psub = "%s%s%s%s" % (subp,subp,subp,subp)
p =re.compile("(?<=\\n)营业(\D*?\n)+%s" % psub)#定义营业收入那一行的内容
lines = p.search(content_1)
lines = lines[0]#形成列表内容
return(lines)
sale_gain = parse_data_line(x)
sale_gain = sale_gain.split("\n")#将列表里的字符串以换行符进行分割,形成新的列表
sale_gain = sale_gain[1]#取列表中第二个字符串,即营业收入
sale.append(sale_gain)#将营业收入放入新的列表
df.insert(i, ste, sale)#以列为单位加入表格
i=i+1
print(df)
df.to_csv("9companies")#将循环后生成的DataFrame表格形成一个新的csv文件
东北证券 中信证券 光大证券 兴业证券 \ 0 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 1 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 2 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 3 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 4 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 5 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 6 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 国信证券 国元证券 国海证券 招商证券 \ 0 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 1 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 2 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 3 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 4 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 5 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 6 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 5,184,163,900.03 西南证券 0 5,184,163,900.03 1 5,184,163,900.03 2 5,184,163,900.03 3 5,184,163,900.03 4 5,184,163,900.03 5 5,184,163,900.03 6 5,184,163,900.03
import fitz # pip install pymupdf
import re
import pandas as pd
import os
import csv
csv_data = pd.read_csv("9companies")
csv_df = pd.DataFrame(csv_data)
csv_df_new = csv_df.iloc[:7,1:10]#得到原先9个公司营业收入的表格
ste = "西部证券"
filenames = os.listdir(ste)
df = pd.DataFrame()
i = 0
sale = []
for pdf in filenames:
pdf = "\\"+pdf
x = "C:\\Users\\Administrator\\.ipython\\python\\final_exam_report\\"+ste+pdf
def getText(pdf):
text = ''
doc = fitz.open(pdf)
for page in doc:
text += page.getText()
doc.close()
return(text)
def get_content(pdf):
text = getText(pdf)
p = re.compile('(?<=\\n)\D、\s*\D*?主要\D*?数据\D*?\s*(?=\\n)(.*?)经营活动产生的',re.DOTALL)
content = p.search(text)
return(content)
content = get_content(x)
content_1 = content[0]
def parse_data_line(pdf):
content = get_content(pdf)
content_1 = content[0]
subp = "([0-9,.%\- ]*?)\n"
psub = "%s%s%s%s" % (subp,subp,subp,subp)
p =re.compile("(?<=\\n)营业(\D*?\n)+%s" % psub)
lines = p.search(content_1)
lines = lines[0]
return(lines)
sale_gain = parse_data_line(x)
sale_gain = sale_gain.split("\n")
sale_gain = sale_gain[1]
sale.append(sale_gain)
df.insert(i, ste, sale)#获得含有西部证券公司2014,2016—2020年营业收入的表格
print(df)
Deprecation: 'getText' removed from class 'Page' after v1.19 - use 'get_text'. mupdf: kid not found in parent's kids array mupdf: kid not found in parent's kids array mupdf: kid not found in parent's kids array mupdf: kid not found in parent's kids array mupdf: invalid page object mupdf: invalid page object mupdf: invalid page object mupdf: invalid page object mupdf: invalid page object mupdf: invalid page object mupdf: invalid page object mupdf: invalid page object mupdf: invalid page object mupdf: invalid page object mupdf: invalid page object mupdf: invalid page object
西部证券 0 1,938,470,480.53 1 3,406,329,331.96 2 3,169,944,961.76 3 2,237,341,729.03 4 3,680,544,587.40 5 5,184,163,900.03
def df_lastnew(df):
df1 = df.loc[:0]
df2 = df.loc[1:]
df3 = pd.DataFrame({"西部证券":["5640878792.18"]})
df = df1.append(df3,ignore_index = True).append(df2,ignore_index = True)
return(df)
df_new = df_lastnew(df)#获得西部证券公司所有年份营业收入的表格
df_want = pd.concat([csv_df_new,df_new],axis = 1)#将所有公司的营业收入汇总成一张表格
print(df_want)
df_want.to_csv("10companies_data")#储存至"10companies_data"csv文件中
东北证券 中信证券 光大证券 \ 0 3,090,984,262.41 29,197,531,133.19 6,601,422,929.86 1 6,745,760,224.97 38,001,923,489.02 16,571,087,246.74 2 4,481,628,728.45 43,291,634,080.53 9,164,639,102.50 3 4,926,111,998.63 37,220,708,075.49 9,838,147,762.07 4 6,780,105,834.67 43,139,697,642.01 7,712,277,101.82 5 7,968,795,586.85 56,013,436,032.55 10,057,362,378.64 6 6,609,613,343.83 54,382,730,241.56 15,866,343,425.84 兴业证券 国信证券 国元证券 \ 0 5,609,064,896.32 11,792,322,619.69 3,486,036,145.60 1 11,540,612,657.75 29,139,131,599.01 5,773,382,071.47 2 7,589,066,883.90 12,748,903,313.78 3,375,520,490.03 3 8,818,781,467.80 1,192,361.02 3,510,702,162.26 4 6,499,373,437.14 1,003,093.19 2,537,907,348.38 5 14,249,535,861.49 1,409,291.46 3,198,808,368.09 6 17,579,687,208.80 1,878,407.12 4,528,625,617.88 国海证券 招商证券 西南证券 \ 0 2,544,981,928.34 11,002,468,274.67 3,674,829,197.88 1 4,959,157,199.17 11,695,453,558.82 8,496,799,180.16 2 3,837,581,191.88 13,353,213,641.86 3,631,659,620.90 3 1,817,078,708.13 11,321,611,555.03 3,060,764,762.10 4 2,122,602,077.80 18,708,369,944.73 2,744,154,393.47 5 3,560,208,077.82 24,277,670,240.59 3,488,837,437.18 6 4,482,015,217.40 25,291,794,057.95 3,169,571,453.97 西部证券 0 1,938,470,480.53 1 5640878792.18 2 3,406,329,331.96 3 3,169,944,961.76 4 2,237,341,729.03 5 3,680,544,587.40 6 5,184,163,900.03
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
import csv
from matplotlib.pyplot import MultipleLocator
import numpy as np
csv_data1 = pd.read_csv("10companies_data")
csv_df1 = pd.DataFrame(csv_data1)
csv_df_new1 = csv_df1.iloc[:7,1:11]#得到包含所有公司不同年份营业收入的表格
list_row = csv_df_new1.values.tolist()#以行为单位取成列表
list_name = list(csv_df_new1)#取行业名称
columns = csv_df_new1.columns
list_columns = []
for c in columns:
d = csv_df_new1[c].values.tolist()
list_columns.append(d)#以列为单位取成列表
for i in range(len(list_row)):
print(list_row[i])
print("\n")
['3,090,984,262.41', '29,197,531,133.19 ', '6,601,422,929.86 ', '5,609,064,896.32 ', '11,792,322,619.69 ', '3,486,036,145.60', '2,544,981,928.34', '11,002,468,274.67 ', '3,674,829,197.88 ', '1,938,470,480.53 '] ['6,745,760,224.97 ', '38,001,923,489.02 ', '16,571,087,246.74 ', '11,540,612,657.75 ', '29,139,131,599.01 ', '5,773,382,071.47', '4,959,157,199.17 ', '11,695,453,558.82 ', '8,496,799,180.16 ', '5640878792.18'] ['4,481,628,728.45 ', '43,291,634,080.53', '9,164,639,102.50 ', '7,589,066,883.90 ', '12,748,903,313.78', '3,375,520,490.03', '3,837,581,191.88 ', '13,353,213,641.86 ', '3,631,659,620.90 ', ' 3,406,329,331.96 '] ['4,926,111,998.63 ', '37,220,708,075.49 ', '9,838,147,762.07 ', '8,818,781,467.80 ', '1,192,361.02 ', '3,510,702,162.26', '1,817,078,708.13 ', '11,321,611,555.03 ', '3,060,764,762.10 ', '3,169,944,961.76 '] ['6,780,105,834.67 ', '43,139,697,642.01 ', '7,712,277,101.82 ', '6,499,373,437.14 ', '1,003,093.19 ', '2,537,907,348.38', '2,122,602,077.80 ', '18,708,369,944.73 ', '2,744,154,393.47 ', '2,237,341,729.03 '] ['7,968,795,586.85 ', '56,013,436,032.55 ', '10,057,362,378.64 ', '14,249,535,861.49 ', '1,409,291.46 ', '3,198,808,368.09', '3,560,208,077.82 ', '24,277,670,240.59 ', '3,488,837,437.18 ', '3,680,544,587.40 '] ['6,609,613,343.83 ', '54,382,730,241.56 ', '15,866,343,425.84 ', '17,579,687,208.80 ', '1,878,407.12 ', '4,528,625,617.88', '4,482,015,217.40 ', '25,291,794,057.95', '3,169,571,453.97 ', '5,184,163,900.03 ']
print(list_name)
['东北证券', '中信证券', '光大证券', '兴业证券', '国信证券', '国元证券', '国海证券', '招商证券', '西南证券', '西部证券']
for i in range(len(list_columns)):
print(list_columns[i])
print("\n")
['3,090,984,262.41', '6,745,760,224.97 ', '4,481,628,728.45 ', '4,926,111,998.63 ', '6,780,105,834.67 ', '7,968,795,586.85 ', '6,609,613,343.83 '] ['29,197,531,133.19 ', '38,001,923,489.02 ', '43,291,634,080.53', '37,220,708,075.49 ', '43,139,697,642.01 ', '56,013,436,032.55 ', '54,382,730,241.56 '] ['6,601,422,929.86 ', '16,571,087,246.74 ', '9,164,639,102.50 ', '9,838,147,762.07 ', '7,712,277,101.82 ', '10,057,362,378.64 ', '15,866,343,425.84 '] ['5,609,064,896.32 ', '11,540,612,657.75 ', '7,589,066,883.90 ', '8,818,781,467.80 ', '6,499,373,437.14 ', '14,249,535,861.49 ', '17,579,687,208.80 '] ['11,792,322,619.69 ', '29,139,131,599.01 ', '12,748,903,313.78', '1,192,361.02 ', '1,003,093.19 ', '1,409,291.46 ', '1,878,407.12 '] ['3,486,036,145.60', '5,773,382,071.47', '3,375,520,490.03', '3,510,702,162.26', '2,537,907,348.38', '3,198,808,368.09', '4,528,625,617.88'] ['2,544,981,928.34', '4,959,157,199.17 ', '3,837,581,191.88 ', '1,817,078,708.13 ', '2,122,602,077.80 ', '3,560,208,077.82 ', '4,482,015,217.40 '] ['11,002,468,274.67 ', '11,695,453,558.82 ', '13,353,213,641.86 ', '11,321,611,555.03 ', '18,708,369,944.73 ', '24,277,670,240.59 ', '25,291,794,057.95'] ['3,674,829,197.88 ', '8,496,799,180.16 ', '3,631,659,620.90 ', '3,060,764,762.10 ', '2,744,154,393.47 ', '3,488,837,437.18 ', '3,169,571,453.97 '] ['1,938,470,480.53 ', '5640878792.18', ' 3,406,329,331.96 ', '3,169,944,961.76 ', '2,237,341,729.03 ', '3,680,544,587.40 ', '5,184,163,900.03 ']
def change_type(list_x):
list_want=[]
for i in range(len(list_x)):
x_a = []
for j in range(len(list_x[1])):
a_a = list_x[i][j]
a_b = a_a.replace(",","")#将字符串中的,替换为空格
a_c = float(a_b)
a_d = a_c / 10**8#将数值缩小为亿分之一,便于在后续图标上展示
a_e = round(a_d,2)#保留两位小数
x_a.append(a_e)
list_want.append(x_a)
return(list_want)
list_row_1 = change_type(list_row)
list_columns_1 = change_type(list_columns)
for i in range(len(list_row_1)):
print(list_row_1[i])
[30.91, 291.98, 66.01, 56.09, 117.92, 34.86, 25.45, 110.02, 36.75, 19.38] [67.46, 380.02, 165.71, 115.41, 291.39, 57.73, 49.59, 116.95, 84.97, 56.41] [44.82, 432.92, 91.65, 75.89, 127.49, 33.76, 38.38, 133.53, 36.32, 34.06] [49.26, 372.21, 98.38, 88.19, 119.24, 35.11, 18.17, 113.22, 30.61, 31.7] [67.8, 431.4, 77.12, 64.99, 100.31, 25.38, 21.23, 187.08, 27.44, 22.37] [79.69, 560.13, 100.57, 142.5, 140.93, 31.99, 35.6, 242.78, 34.89, 36.81] [66.1, 543.83, 158.66, 175.8, 187.84, 45.29, 44.82, 252.92, 31.7, 51.84]
for i in range(len(list_columns_1)):
print(list_columns_1[i])
[30.91, 67.46, 44.82, 49.26, 67.8, 79.69, 66.1] [291.98, 380.02, 432.92, 372.21, 431.4, 560.13, 543.83] [66.01, 165.71, 91.65, 98.38, 77.12, 100.57, 158.66] [56.09, 115.41, 75.89, 88.19, 64.99, 142.5, 175.8] [117.92, 291.39, 127.49, 119.24, 100.31, 140.93, 187.84] [34.86, 57.73, 33.76, 35.11, 25.38, 31.99, 45.29] [25.45, 49.59, 38.38, 18.17, 21.23, 35.6, 44.82] [110.02, 116.95, 133.53, 113.22, 187.08, 242.78, 252.92] [36.75, 84.97, 36.32, 30.61, 27.44, 34.89, 31.7] [19.38, 56.41, 34.06, 31.7, 22.37, 36.81, 51.84]
zhfont1 = matplotlib.font_manager.FontProperties(fname="C:\\Windows\\\Fonts\\SimHei.TTF")
name_list = ["2014","2015","2016","2017","2018","2019","2020"]
def x_ticks(list_columns,list_name):
num_list = list_columns
rects = plt.bar(range(len(list_columns)),num_list,color="rgb",width = 1,tick_label=name_list)
plt.title(list_name+"2014——2020营业收入对比",fontproperties = zhfont1)
plt.xlabel("年份",fontproperties = zhfont1)
plt.ylabel("营业收入(亿元)",fontproperties = zhfont1)
for rect in rects:
height = rect.get_height()
plt.text(rect.get_x() + rect.get_width() / 2, height, str(height), size=10, ha="center", va="bottom")
plt.savefig(list_name +".png",dpi = 600)
plt.show()
for i in range(len(list_columns)):
x_ticks(list_columns_1[i], list_name[i])
<ipython-input-11-75ed3f2431c4>:5: MatplotlibDeprecationWarning: Using a string of single character colors as a color sequence is deprecated since 3.2 and will be removed two minor releases later. Use an explicit list instead. rects = plt.bar(range(len(list_columns)),num_list,color="rgb",width = 1,tick_label=name_list)
list_name_1 = []
for i in range(len(list_name)):#保留公司名称的前两个字
c_a = list_name[i]
c_b = c_a[0:2]
list_name_1.append(c_b)
def y_ticks(list_row,name_list):
num_list_1 = list_row
rects = plt.barh(range(len(list_row)),num_list_1,color='rgby')
N = 10
index = np.arange(N)
plt.yticks(index,list_name_1,fontproperties = zhfont1)
plt.title(name_list+"不同公司营业收入对比",fontproperties = zhfont1)
plt.xlabel("营业收入(亿元)",fontproperties = zhfont1)
plt.ylabel("公司名称",fontproperties = zhfont1)
for rect in rects:
w=rect.get_width()
plt.text(w,rect.get_y()+rect.get_height()/2,w,size =10,ha='left',va='center')
plt.savefig(name_list +".png",dpi = 600)
plt.show()
for i in range(len(list_row)):
y_ticks(list_row_1[i], name_list[i])
<ipython-input-12-0a4af112d88f>:8: MatplotlibDeprecationWarning: Using a string of single character colors as a color sequence is deprecated since 3.2 and will be removed two minor releases later. Use an explicit list instead. rects = plt.barh(range(len(list_row)),num_list_1,color='rgby')