import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus']=False
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import math as m
import numpy as np
import pdfplumber as pdf
import re
import os
搭建环境
def shuju(f):
d=['营业收入','经营活动产生','基本每股收益','稀释每股收益','加权平均净资']
c=pd.DataFrame(index=d,columns=[2017,2018,2019])
for i in f.pages:
if re.search('主要会计数据和财务指标',i.extract_text()):
a=i
b=f.pages[a.page_number]
x=a.extract_tables()+b.extract_tables()
for i in d:
for j in x:
for s in j:
if s.count(''):
s.remove('')
if s[0]:
if bool(re.search(i,s[0])):
n=s[4].replace(',', '')
n=n.replace('%', '')
o=s[2].replace(',', '')
o=o.replace('%', '')
l=s[1].replace(',', '')
l=l.replace('%', '')
c.loc[i,2017]=float(n)
c.loc[i,2018]=float(o)
c.loc[i,2019]=float(l)
return(c)
定义抽取年报数据的函数
name=['炼石航空','航天科技','雷科防务','中航重机']
d=['营业收入','经营活动产生的现金流量净额','基本每股收益','稀释每股收益','加权平均净资产收益率']
e=[]
for i in name:
p=pdf.open('C:/Users\pc\Desktop\新建文件夹 (3)/'+i+'.pdf')
e.append(shuju(p))
抽取数据
for i in range(4):
e[i].index=d
for b in e[i].index.values:
plt.figure()
plt.plot(['2017','2018','2019'],e[i].loc[b,:])
plt.title(name[i]+' '+b)
for i in d:
x=[]
for j in e:
x.append(j.loc[i,2019])
plt.figure()
plt.bar(name,x)
plt.title('2019年各公司'+i)
输出图像