王生靓的实验报告;

目录

Ⅰ.正式作业代码

  1. 年报获取及下载。
  2. 提取年报中财务数据。
  3. 利用获取的财务数据绘图。

Ⅱ.实验结果分析与理解

Ⅲ附录

1.实验心得
2.实验数据

Ⅰ.正式作业代码;

根据分配的行业进行年报获取与下载

A.定义函数

    
    import pdfplumber
    import pandas as pd
    import re
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.common.action_chains import ActionChains
    import time
    import os
    import requests
    from bs4 import BeautifulSoup
    
    #定义所需函数
    '''
    根据学生姓名匹配被分配到的行业深市上市的公司
    '''
    def InputStu():
        Names = str(input('请输入姓名:'))
        Namelist = Names.split()
        return Namelist
    
    def Match(Namelist,assignment):
            match = pd.DataFrame()
            for name in Namelist:
                match = pd.concat([match,assignment.loc[assignment['完成人']==name]])
            Number = match['行业'].tolist()
            return Number
        
    def Get_sz(data):
         sz=['200','300','301','00','080']   
         lst = [ x for x in data for startcode in sz if x[3].startswith(startcode)==True ]
         df = pd.DataFrame(lst,columns=data[0]).iloc[:,1:]
         return df
    
    def Get_sh(data):
        lst = [ x for x in data if x[3].startswith('6')==True ]
        df = pd.DataFrame(lst,columns=data[0]).iloc[:,1:]
        return df
     
    def Getcompany(matched,df):   
        df_final = pd.DataFrame()
        df_final = df.loc[df['行业大类代码']==matched[0]]
        return df_final
    
    def Clean(lst):   
        for i in range(len(lst)):
            lst[i] = lst[i].replace('*','')
        return lst        
    
    '''
    利用selenium爬取所需公司年报
    '''
    
    def InputTime(start,end): #找到时间输入窗口并输入时间
        START = browser.find_element(By.CLASS_NAME,'input-left') 
        END = browser.find_element(By.CLASS_NAME,'input-right')    
        START.send_keys(start)
        END.send_keys(end + Keys.RETURN)
    
    
    def SelectReport(kind):  #挑选报告的类别
        browser.find_element(By.LINK_TEXT,'请选择公告类别').click()
        if   kind == 1:    
            browser.find_element(By.LINK_TEXT,'一季度报告').click()
        elif kind == 2:
            browser.find_element(By.LINK_TEXT,'半年报告').click()
        elif kind == 3:    
            browser.find_element(By.LINK_TEXT,'三季度报告').click()
        elif kind == 4:
            browser.find_element(By.LINK_TEXT,'年度报告').click()
    
    def SearchCompany(name): #找到搜索框,通过股票简称查找对应公司的报告
        Searchbox = browser.find_element(By.ID, 'input_code') # Find the search box
        Searchbox.send_keys(name)
        time.sleep(0.2)
        Searchbox.send_keys(Keys.RETURN)
    
    
    def Clearicon():        #清除选中上个股票的历史记录
        browser.find_elements(By.CLASS_NAME,'icon-remove')[-1].click()
    
    def Clickonblank():     #点击空白
        ActionChains(browser).move_by_offset(200, 100).click().perform()
       
    
        
    def Save(filename,content):
        open('%s.html' %filename,'w',encoding='utf-8').write(content)
    
    
    '''
    解析html获取年报表格
    '''
    class DisclosureTable():
        '''
        解析深交所定期报告页搜索表格
        '''
        def __init__(self, innerHTML):
            self.html = innerHTML
            self.prefix = 'https://disc.szse.cn/download'
            self.prefix_href = 'https://www.szse.cn/'
            # 获得证券的代码和公告时间
            p_a = re.compile('(.*?)', re.DOTALL)
            p_span = re.compile('(.*?)', re.DOTALL)
            self.get_code = lambda txt: p_a.search(txt).group(1).strip()
            self.get_time = lambda txt: p_span.search(txt).group(1).strip()
            # 将txt_to_df赋给self
            self.txt_to_df()
            
        def txt_to_df(self):
            # html table text to DataFrame
            html = self.html
            p = re.compile('(.*?)', re.DOTALL)
            trs = p.findall(html)
            
            p2 = re.compile('(.*?)', re.DOTALL)
            tds = [p2.findall(tr) for tr in trs[1:]]
            df = pd.DataFrame({'证券代码': [td[0] for td in tds],
                               '简称': [td[1] for td in tds],
                               '公告标题': [td[2] for td in tds],
                               '公告时间': [td[3] for td in tds]})
            self.df_txt = df
    
    
        
        # 获得下载链接
        def get_link(self, txt):
            p_txt = '(.*?)'
            p = re.compile(p_txt, re.DOTALL)
            matchObj = p.search(txt)
            attachpath = matchObj.group(1).strip()
            href       = matchObj.group(2).strip()
            title      = matchObj.group(3).strip()
            return([attachpath, href, title])
    
        def get_data(self):
            get_code = self.get_code
            get_time = self.get_time
            get_link = self.get_link
            # 
            df = self.df_txt
            codes = [get_code(td) for td in df['证券代码']]
            short_names = [get_code(td) for td in df['简称']]
            ahts = [get_link(td) for td in df['公告标题']]
            times = [get_time(td) for td in df['公告时间']]
            #
            prefix = self.prefix
            prefix_href = self.prefix_href
            df = pd.DataFrame({'证券代码': codes,
                               '简称': short_names,
                               '公告标题': [aht[2] for aht in ahts],
                               'attachpath': [prefix + aht[0] for aht in ahts],
                               'href': [prefix_href + aht[1] for aht in ahts],
                               '公告时间': times
                })
            self.df_data = df
            return(df)
        
    def cninfo_to_dataframe(filename):
        f = open(filename+'.html',encoding='utf-8')
        html = f.read()
        f.close()
        soup = BeautifulSoup(html,features="html.parser")
        links = soup.find_all('a')
        os.getcwd()
        Code=[]
        Name=[]
        Title=[]
        href=[]
        Href=[]
        Text=[]
        p=re.compile('.*?&announcementId=(\d+).*?&announcementTime=(\d{4}-\d{2}-\d{2})')
        for link in links:
            Text.append(link.text)
            Href.append(link.get('href'))
        for n in range(0,len(Text),3):
            Code.append(Text[n])
            Name.append(Text[n+1])
            Title.append(Text[n+2])
            num=re.findall(p,Href[n+2])
            href.append('http://www.cninfo.com.cn/new/announcement/download?
    bulletinId='+str(num[0][0])+'&announceTime='+str(num[0][1]))    
        df=pd.DataFrame({'代码':Code,
                         '简称':Name,
                         '公告标题':Title,
                         '链接':href,})
        return df
    
    
    def sina_to_dataframe(name):
        f = open(name+'.html',encoding='utf-8')
        html = f.read()
        f.close()
        p_time=re.compile('(\d{4})(-\d{2})(-\d{2})')
        times=p_time.findall(html)
        y=[int(t[0]) for t in times]
        m=[t[0]+t[1].replace('0','') for t in times]
        d=[t[0]+t[1]+t[2] for t in times]
        soup = BeautifulSoup(html,features="html.parser")
        links = soup.find_all('a') 
        href=[]
        Code=[]
        Name=[]
        Title=[]
        Href=[]
        Year=[]
        p_id=re.compile('&id=(\d+)')
        for link in links:
            Title.append(link.text.replace('*',''))
            href.append(link.get('href'))
        for n in range(0,len(Title)):
            Code.append(code)
            Name.append(name)
            matchedID=p_id.search(href[n]).group(1)
            Href.append('http://file.finance.sina.com.cn/211.154.219.97:9494/
            MRGG/CNSESH_STOCK/%s/%s/%s/%s.PDF'
                        %(str(y[n]),m[n],d[n],matchedID))    
            Year.append(y[n-1])
        df=pd.DataFrame({'代码':Code,
                         '简称':Name,
                         '公告标题':Title,
                         '链接':Href,
                        '年份':Year})
        df=df[df['年份']>=2013]
        return df
    
    '''
    过滤年报并下载文件
    '''
    def Readhtml(filename):
        with open(filename+'.html', encoding='utf-8') as f:
            html = f.read()
        return html
    
    def tidy(df):  #清除“摘要”型、“(已取消)”型文件
        d = []
        for index, row in df.iterrows():
            ggbt = row[2]
            a = re.search("摘要|取消|英文", ggbt)
            if a != None:
                d.append(index)
        df1 = df.drop(d).reset_index(drop = True)
        return df1
    
    
    def Loadpdf(df):#用于下载文件
        d1 = {}
        for index, row in df.iterrows():
            d1[row[2]] = row[3]
        for key, value in d1.items():
            f = requests.get(value)
            with open (key+".pdf", "wb") as code:
                code.write(f.content)
    

B.操作代码

    
    '''
    第一步,根据学生姓名自动挑选出所分配行业于深市上市的公司(第一家或随机)
    '''
    pdf = pdfplumber.open('行业分类表.pdf')
    table = pdf.pages[0].extract_table()
    for i in range(len(table)):
        if table[i][1] == None:
            table[i][1] = table[i-1][1]
    asign = pd.read_csv('行业安排表.csv',converters={'行业':str})[['行业','完成人']] 
    Names = InputStu()
    MatchedI = Match(Names,asign)
    sz = Get_sz(table)
    sh = Get_sh(table)
    df_sz = Getcompany(MatchedI,sz)
    df_sh = Getcompany(MatchedI,sh)
    Company = df_sz[['上市公司代码','上市公司简称']]
    Company2 = df_sh[['上市公司代码','上市公司简称']]
    My_Company=Company.append(Company2)
    My_Company.to_csv('company.csv',encoding='utf-8-sig') 
    
    '''
    第二步爬取所需公司年报
    '''
    print('\n(爬取网页中......)')
    browser = webdriver.Edge()#这里别忘了根据个人浏览器选择
    browser.get('https://www.szse.cn/disclosure/listed/fixed/index.html')
    End = time.strftime('%Y-%m-%d', time.localtime())    
    InputTime('2012-01-01',End)
    
    SelectReport(4) # 调用函数,选择“年度报告”
    Clickonblank()
    
    #在深交所官网爬取深交所上市公司年报下载链接
    for index,row in Company.iterrows():
        code = row[0]
        name = row[1].replace('*','')
        SearchCompany(code)
        time.sleep(0.5) # 延迟执行0.5秒,等待网页加载
        html = browser.find_element(By.ID, 'disclosure-table')
        innerHTML = html.get_attribute('innerHTML')
        Save(name,innerHTML)
        Clearicon()
    
    #在新浪财经爬取上交所上市公司年报下载链接
    for index,row in Company2.iterrows():
       code = row[0]
       name = row[1].replace('*','')
       browser.get('https://vip.stock.finance.sina.com.cn/corp/go.php/vCB_Bulletin
       /stockid/%s/page_type/ndbg.phtml'%code)
       html = browser.find_element(By.CLASS_NAME, 'datelist')
       innerHTML = html.get_attribute('innerHTML')
       Save(name,innerHTML)
    browser.quit()
    
    '''
    第三步,解析html获取年报表格存储到本地并下载年报文件
    '''
    print('\n【开始保存年报】')
    print('正在下载深交所上市公司年报')
    i = 0
    for index,row in Company.iterrows():
        i+=1
        name = row[1].replace('*','')
        html = Readhtml(name)
        dt = DisclosureTable(html)
        df = dt.get_data()
        df1 = tidy(df)
        df1.to_csv(name+'.csv',encoding='utf-8-sig')
        os.makedirs(name,exist_ok=True)#创建用于放置下载文件的子文件夹
        os.chdir(name)
        Loadpdf(df1)
        print(name+'年报已保存完毕。共',len(Company),'所公司,当前第',i,'所。')
        os.chdir('../') #将当前工作目录爬到父文件夹,防止下一次循环找不到html文件
      
    print('正在下载上交所上市公司年报')
    j=0
    for index,row in Company2.iterrows():
        j+=1
        name= row[1].replace('*','')
        html = Readhtml(name)
        df = sina_to_dataframe(name)
        df1 = tidy(df)
        df1.to_csv(name+'.csv',encoding='utf-8-sig')
        os.makedirs(name,exist_ok=True)#创建用于放置下载文件的子文件夹
        os.chdir(name)
        Loadpdf(df1)
        print(name+'年报已保存完毕。共',len(Company2),'所公司,当前第',j,'所。')
        os.chdir('../') #将当前工作目录爬到父文件夹,防止下一次循环找不到html文件
    
    


结果如下:

爬取下来的源码(html格式),解析成dataframe的表格(csv格式),自动下载的年报(pdf格式)

各公司查询近十年年报结果源码

结果截图

网页爬取中

结果截图

解析后得到的表格

结果截图
结果截图

通过访问表格"attachpath"栏自动下载的年报

结果截图
结果截图

提取“营业收入(元)”、“基本每股收益(元 ╱ 股)”、“归属于上市公司股东的净利润(元)”

“股票简称”、“股票代码”、“办公地址”、“公司网址”

    
    
    import pandas as pd
    import fitz
    import re
    
    Company=pd.read_csv('company.csv').iloc[:,1:] 
    company=Company.iloc[:,1].tolist()
    t=0
    for com in company:
        t+=1
        com=com.replace('*','')
        df = pd.read_csv(com+'.csv',converters={'证券代码':str}) 
        df = df.sort_index(ascending=False)
        final = pd.DataFrame(index=range(2011,2022),columns=['营业收入
    (元)','基本每股收益(元/股)','归属于上市公司股东的净利润(元)'])
        final.index.name='年份'
        code = str(df.iloc[0,1])
        name = df.iloc[-1,2].replace(' ','')
    
    
        for i in range(len(df)): #循环访问每年的年报
            title=df.iloc[i,3]
            doc = fitz.open('./%s/%s.pdf'%(com,title))
            text=''
            for j in range(15):
                page = doc[j]
                text += page.get_text()
            p_year=re.compile('.*?(\d{4}) .*?年度报告.*?')
            year = int(p_year.findall(text)[0])
            p_rev = re.compile('(?<=\n)营业总?收入(?\w?)?\s?\n?([\d+
    ,.]*)\s\n?')
           p_eps = re.compile('(?<=\n)基本每股收益(元/?/?\n?股)\s?\n
    ?([-\d+,.]*)\s?\n?')
           p_np = re.compile('(?<=\n)归属于上市公司股东的净利润(?\w?)?\
    s?\n?([-\d+,.]*)\s?\n?')
           p_site = re.compile('(?<=\n)\w*办公地址:?\s?\n?(.*?)\s?(?=\
    n)',re.DOTALL)
            p_web =re.compile('(?<=\n)公司\w*网址:?\s?\n?([a-zA-Z./:]*)
    \s?(?=\n)',re.DOTALL)
    
            revenue=float(p_rev.search(text).group(1).replace(',',''))
            
            if year>2012:
                pre_rev=final.loc[year-1,'营业收入(元)']
                if pre_rev/revenue>2:
                    print('警告:%s%s营业收入下跌超过百分之50,可能出现问题,请
    手动查看'%(com,title))
            eps=p_eps.search(text).group(1)
            final.loc[year,'营业收入(元)']=revenue  
            final.loc[year,'基本每股收益(元/股)']=eps
            final.loc[year,'归属于上市公司股东的净利润(元)']=eps
    
        final.to_csv('【%s】.csv' %com,encoding='utf-8-sig')  #将各公司数据
    存储到本地测csv文件
    
    
        site=p_site.search(text).group(1) #匹配办公地址和网址(由于取最近一年
    的,所以只要匹配一次不用循环匹配)
        web=p_web.search(text).group(1)
    
    
        with open('【%s】.csv'%com,'a',encoding='utf-8-sig') as f: 
            content='股票简称,%s\n股票代码,%s\n办公地址,%s\n公司网址,%s'%(nam
    e,code,site,web)
            f.write(content)
        print(name+'数据已保存完毕'+'(',t,'/',len(company),')')
    
    

结果:存到本地的数据文件(csv格式)


结果如下:

结果截图

结果截图
结果截图

利用获取的十个公司数据,绘制“营业收入(元)”、“基本每股收益(元 ╱ 股)”、 “归属于上市公司股东的净利润(元)”随时间变化趋势图

按每一年度,对该行业内公司“营业收入(元)”、“基本每股收益(元 ╱ 股)”、 “归属于上市公司股东的净利润(元)”绘制对比图


    
    import pandas as pd
    import matplotlib.pyplot as plt
    
    Company=pd.read_csv('company.csv',).iloc[:,1:]
    company=Company.iloc[:,1].tolist()
    dflist=[]
    for name in company:
        com = name.replace('*','')
        data=pd.read_csv('【'+com+'】.csv')
        dflist.append(data)  #将所有的csv文件保存到一个list里方便后续调用
    comps = len(dflist)
    for i in range(comps):
        dflist[i]=dflist[i].set_index('年份')
    def compare_rev(data):
        df=pd.DataFrame(columns=['近十年总营业收入(元)'])
        for i in range(comps):
            df.loc[dflist[i].loc['股票简称','营业收入(元)'],'近十年总营业收
    入(元)']=dflist[i].iloc[:11,0].astype(float).sum()
        return df
    rank=compare_rev(dflist).sort_values('近十年总营业收入(元)',ascending=
    False).head(10) 
    names=['泸州老窖', '古井贡酒', '燕京啤酒', '酒鬼酒', '承德露露', '五粮液',
     '顺鑫农业', '张裕A','兰州黄河', '*ST黄台']
    indexes=[] 
    for idx in names:
        indexes.append(company.index(idx))
    datalist=[] 
    datalist1=[]
    for i in indexes:
            datalist.append(pd.DataFrame(dflist[i].iloc[:11,0]))
    for df in datalist:
        df.index=df.index.astype(int)
        df['营业收入(元)']=df['营业收入(元)'].astype(float)/100000000
    for i in indexes: 
            datalist1.append(pd.DataFrame(dflist[i].iloc[:11,1]))
    for df in datalist1:
        df.index=df.index.astype(int)
        df['基本每股收益(元/股)']=df['基本每股收益(元/股)'].astype(float)
    for i in indexes: 
            datalist.append(pd.DataFrame(dflist[i].iloc[:11,2]))
    for df in datalist2:
        df.index=df.index.astype(int)
        df['归属于上市公司股东的净利润(元)']=df['归属于上市公司股东的净利润(元)']
    .astype(float)/100000000
    hori_rev=pd.concat(datalist,axis=1) #将所有公司的df合并成汇总表
    hori_eps=pd.concat(datalist1,axis=1)
    hori_np=pd.concat(datalist2,axis=1)
    hori_rev.columns=rank.index
    hori_eps.columns=rank.index
    hori_np.columns=rank.index
    #绘图
    
    plt.rcParams['font.sans-serif']=['SimHei']
    plt.figure(figsize=(16,30))
    x = datalist[0].index
    y1 = hori_rev.iloc[:,0]
    y2 = hori_rev.iloc[:,1]
    y3 = hori_rev.iloc[:,2]
    y4 = hori_rev.iloc[:,3]
    y5 = hori_rev.iloc[:,4]
    y6 = hori_rev.iloc[:,5]
    y7 = hori_rev.iloc[:,6]
    y8 = hori_rev.iloc[:,7]
    y9 = hori_rev.iloc[:,8]
    y10 = hori_rev.iloc[:,9]
    plt.xlim(2011,2023,1)
    #plt.ylim()
    plt.xticks(range(2011,2022),fontsize=18)
    plt.yticks(fontsize=18)
    plt.plot(x,y1,  color='#9BCD9B',marker = 'o',markersize=7,linestyle='-',label=
    '泸州老窖',linewidth =1,alpha=0.8)
    plt.plot(x,y2,color='#1E90FF', marker='^',markersize=7, linestyle='-',label=
    '古井贡酒',linewidth =1,alpha=0.8)
    plt.plot(x,y3,color='#2E8B57', marker='*', markersize=7,linestyle='-',label=
    '燕京啤酒',linewidth =1,alpha=0.8)
    plt.plot(x,y4,color='#FF8C00', marker='x', markersize=7,linestyle='-',label=
    '酒鬼酒',linewidth =1,alpha=0.8)
    plt.plot(x,y5,color='#4682B4', marker='D', markersize=7,linestyle='-',label=
    '承德露露',linewidth =1,alpha=0.8)
    plt.plot(x,y6,color='#FF6A6A', marker='+', markersize=7,linestyle='-',label=
    '五粮液',linewidth =1,alpha=0.8)
    plt.plot(x,y7,color='#6495ED', marker='v', markersize=7,linestyle='-',label=
    '顺鑫农业',linewidth =1,alpha=0.8)
    plt.plot(x,y8,color='#FFB90F', marker='1', markersize=7,linestyle='-',label=
     '张裕A',linewidth =1,alpha=0.8)
    plt.plot(x,y9,color='#8B3A3A', marker='1', markersize=7,linestyle='-',label=
    '兰州黄河',linewidth =1,alpha=0.8)
    plt.plot(x,y10,color='#00CED1', marker='1', markersize=7,linestyle='-',label=
    '*ST黄台',linewidth =1,alpha=0.8)
    plt.legend(loc = "upper left",prop={'family':'simsun', 'size': 20},framealpha
    =0.8)  # 显示图例
    plt.grid(True)
    title="营业收入随时间变化趋势图(2012-2023)"
    plt.title(title,fontsize=25)
    plt.ylabel("营业收入(亿元)",fontsize=22)  # 设置Y轴标签
    plt.xlabel("年份",fontsize=22,loc='left')  # 设置X轴标签
    plt.show()
    
    
    plt.rcParams['font.sans-serif']=['SimHei']
    plt.rcParams['axes.unicode_minus']=False
    plt.figure(figsize=(18,24))
    x = datalist[0].index
    #y = range(len(names_y))
    y1 = hori_eps.iloc[:,0]
    y2 = hori_eps.iloc[:,1]
    y3 = hori_eps.iloc[:,2]
    y4 = hori_eps.iloc[:,3]
    y5 = hori_eps.iloc[:,4]
    y6 = hori_eps.iloc[:,5]
    y7 = hori_eps.iloc[:,6]
    y8 = hori_eps.iloc[:,7]
    y9 = hori_eps.iloc[:,8]
    y10 = hori_eps.iloc[:,9]
    plt.xlim(2011,2023,1)
    #plt.ylim()
    plt.xticks(range(2012,2023),fontsize=18)
    plt.yticks(fontsize=18)
    plt.plot(x,y1,  color='#9BCD9B',marker = 'o',markersize=7,linestyle='-',label=
    '泸州老窖',linewidth =1,alpha=0.8)
    plt.plot(x,y2,color='#1E90FF', marker='^',markersize=7, linestyle='-',label=
    '古井贡酒',linewidth =1,alpha=0.8)
    plt.plot(x,y3,color='#2E8B57', marker='*', markersize=7,linestyle='-',label=
    '燕京啤酒',linewidth =1,alpha=0.8)
    plt.plot(x,y4,color='#FF8C00', marker='x', markersize=7,linestyle='-',label=
    '酒鬼酒',linewidth =1,alpha=0.8)
    plt.plot(x,y5,color='#4682B4', marker='D', markersize=7,linestyle='-',label=
    '承德露露',linewidth =1,alpha=0.8)
    plt.plot(x,y6,color='#FF6A6A', marker='+', markersize=7,linestyle='-',label=
    '五粮液',linewidth =1,alpha=0.8)
    plt.plot(x,y7,color='#6495ED', marker='v', markersize=7,linestyle='-',label=
    '顺鑫农业',linewidth =1,alpha=0.8)
    plt.plot(x,y8,color='#FFB90F', marker='1', markersize=7,linestyle='-',label=
     '张裕A',linewidth =1,alpha=0.8)
    plt.plot(x,y9,color='#8B3A3A', marker='1', markersize=7,linestyle='-',label=
    '兰州黄河',linewidth =1,alpha=0.8)
    plt.plot(x,y10,color='#00CED1', marker='1', markersize=7,linestyle='-',label=
    '*ST黄台',linewidth =1,alpha=0.8)
    plt.legend(loc = "upper left",prop={'family':'simsun', 'size': 20},framealpha
    =0.8)  # 显示图例
    plt.grid(True)
    title="基本每股收益随时间变化趋势图(2013-2022)"
    plt.title(title,fontsize=25)
    plt.ylabel("基本每股收益(元/股)",fontsize=22)  # 设置Y轴标签
    plt.xlabel("年份",fontsize=22)  # 设置X轴标签
    plt.show()
    
    
    plt.rcParams['font.sans-serif']=['SimHei']
    plt.figure(figsize=(16,30))
    x = datalist[0].index
    y1 = hori_np.iloc[:,0]
    y2 = hori_np.iloc[:,1]
    y3 = hori_np.iloc[:,2]
    y4 = hori_np.iloc[:,3]
    y5 = hori_np.iloc[:,4]
    y6 = hori_np.iloc[:,5]
    y7 = hori_np.iloc[:,6]
    y8 = hori_np.iloc[:,7]
    y9 = hori_np.iloc[:,8]
    y10 = hori_np.iloc[:,9]
    plt.xlim(2011,2023,1)
    #plt.ylim()
    plt.xticks(range(2011,2022),fontsize=18)
    plt.yticks(fontsize=18)
    plt.plot(x,y1,  color='#9BCD9B',marker = 'o',markersize=7,linestyle='-',label=
    '泸州老窖',linewidth =1,alpha=0.8)
    plt.plot(x,y2,color='#1E90FF', marker='^',markersize=7, linestyle='-',label=
    '古井贡酒',linewidth =1,alpha=0.8)
    plt.plot(x,y3,color='#2E8B57', marker='*', markersize=7,linestyle='-',label=
    '燕京啤酒',linewidth =1,alpha=0.8)
    plt.plot(x,y4,color='#FF8C00', marker='x', markersize=7,linestyle='-',label=
    '酒鬼酒',linewidth =1,alpha=0.8)
    plt.plot(x,y5,color='#4682B4', marker='D', markersize=7,linestyle='-',label=
    '承德露露',linewidth =1,alpha=0.8)
    plt.plot(x,y6,color='#FF6A6A', marker='+', markersize=7,linestyle='-',label=
    '五粮液',linewidth =1,alpha=0.8)
    plt.plot(x,y7,color='#6495ED', marker='v', markersize=7,linestyle='-',label=
    '顺鑫农业',linewidth =1,alpha=0.8)
    plt.plot(x,y8,color='#FFB90F', marker='1', markersize=7,linestyle='-',label=
     '张裕A',linewidth =1,alpha=0.8)
    plt.plot(x,y9,color='#8B3A3A', marker='1', markersize=7,linestyle='-',label=
    '兰州黄河',linewidth =1,alpha=0.8)
    plt.plot(x,y10,color='#00CED1', marker='1', markersize=7,linestyle='-',label=
    '*ST黄台',linewidth =1,alpha=0.8)
    plt.legend(loc = "upper left",prop={'family':'simsun', 'size': 20},framealpha
    =0.8)  # 显示图例
    plt.grid(True)
    title="归属于上市公司股东的净利润趋势图(2013-2022)"
    plt.title(title,fontsize=25)
    plt.ylabel("归属于上市公司股东的净利润(亿元)",fontsize=22)  # 设置Y轴标签
    plt.xlabel("年份",fontsize=22)  # 设置X轴标签
    plt.show()
    
    hori_revup=hori_rev.head(10)
    hori_epsup=hori_eps.head(10)
    plt.rcParams['font.sans-serif']=['SimHei']
    plt.rcParams['axes.unicode_minus']=False
    ax1=hori_revup.plot(kind='bar',figsize=(16,8),fontsize=18,alpha=0.7,grid=True)
    ax1.legend(loc='best',prop={'family':'simsun', 'size': 14},framealpha=0.5)
    ax1.set_xlabel('年份',loc='left',fontsize=18)
    ax1.set_ylabel('营业收入(十亿元)',fontsize=18)
    ax1.set_title('行业内横向对比营业收入(2013-2022)',fontsize=20)
    ax1.figure.savefig('1')
    
    ax2=hori_epsup.plot(kind='bar',figsize=(18,10),fontsize=18,grid=True,alpha=0.7)
    ax2.legend(loc='best',prop={'family':'simsun', 'size': 14},framealpha=0.7)
    ax2.set_xlabel('年份',loc='right',fontsize=18)
    ax2.set_ylabel('基本每股收益(元/股)',fontsize=18)
    ax2.set_title('行业内横向对比基本每股收益(2011-2014)',fontsize=20)
    ax2.figure.savefig('2')
    
    ax3=hori_epsup.plot(kind='bar',figsize=(18,10),fontsize=18,grid=True,alpha=0.7)
    ax3.legend(loc='best',prop={'family':'simsun', 'size': 14},framealpha=0.7)
    ax3.set_xlabel('年份',loc='right',fontsize=18)
    ax3.set_ylabel('归属于上市公司股东的净利润(亿元)',fontsize=18)
    ax3.set_title('行业内横向对比归属于上市公司股东的净利润(2012-2023)',fontsize=20)
    ax3.figure.savefig('3')
    
    

绘图结果

酒、饮料和精制茶制造业的十家公司的“营业收入(元)”随时间变化趋势图📈

结果截图

酒、饮料和精制茶制造业十家公司的“基本每股收益(元 ╱ 股)”随时间变化趋势图”📉

结果截图

酒、饮料和精制茶制造业十家公司的“归属于上市公司股东的净利润(元)”随时间变化趋势图” 📉

结果截图

按每一年度,酒、饮料和精制茶制造业上市公司“营业收入(元)”对比图📊

结果截图

按每一年度,酒、饮料和精制茶制造业上市公司“基本每股收益(元 ╱ 股)”对比图📊

结果截图

按每一年度,酒、饮料和精制茶制造业上市公司“归属于上市公司股东的净利润(元)”对比图📊

结果截图

Ⅱ.实验结果分析与理解📝


Ⅲ.附录

1.实验心得💡

第一次完全用python完成一系列任务,包括爬虫,数据整理,数据提取和绘图等内容。从中优化了对爬虫, 正则和表达式和文件读写的理解,并学会了相应技能,更亲身体会到了自动化的强大之处。
在完成作业时,对于方法问题都能通过学习和搜集信息解决,提高了自学能力。对于编程问题,要仔细和耐 心,有时只是简单的数据类型错误或者表达式和函数运用不规范。同时,在解决实际问题时,不能限制住自 己的思维,要发散思维,可能会有更简单的解决方式。

2.实验数据📁

实验数据