In [1]:
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus']=False
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import math as m
import numpy as np
import pdfplumber as pdf
import re
import os

搭建环境

In [2]:
def shuju(f):
    d=['营业收入','经营活动产生','基本每股收益','稀释每股收益','加权平均净资']
    c=pd.DataFrame(index=d,columns=[2017,2018,2019])
    for i in f.pages:
        if re.search('主要会计数据和财务指标',i.extract_text()):
            a=i
    b=f.pages[a.page_number]
    x=a.extract_tables()+b.extract_tables()
    for i in d:
        for j in x:
            for s in j:
                if s.count(''):
                    s.remove('')
                if s[0]:
                    if bool(re.search(i,s[0])):
                        n=s[4].replace(',', '')
                        n=n.replace('%', '')
                        o=s[2].replace(',', '')
                        o=o.replace('%', '')
                        l=s[1].replace(',', '')
                        l=l.replace('%', '')
                        c.loc[i,2017]=float(n)
                        c.loc[i,2018]=float(o)
                        c.loc[i,2019]=float(l)
    return(c)

定义抽取年报数据的函数

In [6]:
name=['炼石航空','航天科技','雷科防务','中航重机']
d=['营业收入','经营活动产生的现金流量净额','基本每股收益','稀释每股收益','加权平均净资产收益率']
e=[]
for i in name:
    p=pdf.open('C:/Users\pc\Desktop\新建文件夹 (3)/'+i+'.pdf')
    e.append(shuju(p))

抽取数据

In [7]:
for i in range(4):
    e[i].index=d
    for b in e[i].index.values:
        plt.figure()
        plt.plot(['2017','2018','2019'],e[i].loc[b,:])
        plt.title(name[i]+' '+b)
for i in d:
    x=[]
    for j in e:
        x.append(j.loc[i,2019])
    plt.figure()
    plt.bar(name,x)
    plt.title('2019年各公司'+i)
<ipython-input-7-8dded5b30adf>:11: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  plt.figure()

输出图像