张雨涵的实验报告

姓名：张雨涵

学号：0194794

代码1 (从行业分类中获取所有公司代码和简称)


#导入模块
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import fitz
import re
import os
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import time

#从行业分类中获取所有公司代码和简称
doc = fitz.open(r"C:\Users\Administrator\Desktop\行业分类.pdf")#获取行业分类
page75 = doc[75]#定位到行业48：土木工程建筑业
text1 = page75.get_text()
page76 = doc[76]
text2 = page76.get_text()
p1 = re.compile(r'(?<=\n)(\d{6})\n(\*?\w+)*(?=\n)')#写正则表达式匹配行业内股票的代码和简称
txt_1 = p1.findall(text1)
txt_2 = p1.findall(text2)
txt1 = txt_1[13:51]#由于第75页存在行业47的公司，需去除
txt2 = txt_2[0:49]
txt = txt1 + txt2

s=""
p2 = re.compile(r'\d+')#写正则表达式匹配行业内股票代码
for i in txt:
    s = s + str(i) + ""

p2 = re.compile(r'\d+')
code = p2.findall(s)

code_sz = code[0:31]#前面部分的公司是在深交所上市的，获取年报时需要在深交所下载
code_sh = code[31:71]#后面部分的公司是在上交所上市的，获取年报时需要在上交所下载

结果1

代码2(获取深交所年报并下载)


    
#获取深交所年报
    
for i in range(0,len(code_sz)):
    browser = webdriver.Chrome()
    browser.get('http://www.szse.cn/disclosure/listed/fixed/index.html')
    time.sleep(2)
    browser.find_element(By.CSS_SELECTOR, "#select_gonggao .c-selectex-btn-text").click()
    browser.find_element(By.LINK_TEXT, "年度报告").click()
    time.sleep(2)
    browser.find_element(By.CSS_SELECTOR, ".input-left").click()
    browser.find_element(By.CSS_SELECTOR, ".input-left").send_keys("2013-01-01")
    browser.find_element(By.CSS_SELECTOR, ".input-right").click()
    browser.find_element(By.CSS_SELECTOR, ".input-right").send_keys("2022-06-01")
    browser.find_element(By.ID, "query-btn").click()
    time.sleep(2)
    element = browser.find_element(By.ID, 'input_code')
    element.send_keys(code_sz[i])
    element.send_keys(Keys.ENTER)
    time.sleep(2)
    element = browser.find_element(By.ID,"disclosure-table")
    time.sleep(2)
    innerHTML = element.get_attribute("innerHTML")
    time.sleep(2)
    f = open("深交所年报.html",'a',encoding='utf-8')
    f.write(innerHTML)
    time.sleep(2)
    f.close()
    browser.find_element(By.CSS_SELECTOR, ".btn-clearall").click()
browser.quit()

#解析深交所定期报告页搜索表格
    def to_pretty(fhtml):
    f = open(fhtml,encoding='utf-8')
    html = f.read()
    f.close()

    soup = BeautifulSoup(html)
    html_prettified = soup.prettify()

    f = open(fhtml[0:-5]+'-prettified.html', 'w', encoding='utf-8')
    f.write(html_prettified)
    f.close()
    return(html_prettified)

html = to_pretty('深交所年报.html')


def txt_to_df(html):
    # html table text to DataFrame
    p = re.compile('(.*?)', re.DOTALL)
    trs = p.findall(html)

    p2 = re.compile('(.*?)', re.DOTALL)
    tds1 = [p2.findall(tr) for tr in trs[1:]]

    tds = list(filter(None, tds1))
    df = pd.DataFrame({'证券代码': [td[0] for td in tds],
                       '简称': [td[1] for td in tds],
                       '公告标题': [td[2] for td in tds],
                       '公告时间': [td[3] for td in tds]})
    return(df)

df_txt = txt_to_df(html)

p_a = re.compile('(.*?)', re.DOTALL)
p_span = re.compile('(.*?)', re.DOTALL)

get_code = lambda txt: p_a.search(txt).group(1).strip()
get_time = lambda txt: p_span.search(txt).group(1).strip()

def get_link(txt):
    p_txt = '(.*?)'
    p = re.compile(p_txt, re.DOTALL)
    matchObj = p.search(txt)
    attachpath = matchObj.group(1).strip()
    href       = matchObj.group(2).strip()
    title      = matchObj.group(3).strip()
    return([attachpath, href, title])

def get_data(df_txt):
    prefix = 'https://disc.szse.cn/download'
    prefix_href = 'https://www.szse.cn/'
    df = df_txt
    codes = [get_code(td) for td in df['证券代码']]
    short_nianfens = [get_code(td) for td in df['简称']]
    ahts = [get_link(td) for td in df['公告标题']]
    times = [get_time(td) for td in df['公告时间']]
    df = pd.DataFrame({'证券代码': codes,
                       '简称': short_nianfens,
                       '公告标题': [aht[2] for aht in ahts],
                       'attachpath': [prefix + aht[0] for aht in ahts],
                       'href': [prefix_href + aht[1] for aht in ahts],
                       '公告时间': times
        })
    return(df)

df_sz = get_data(df_txt)

#过滤年报摘要与已取消的年报
def tidy(df_sz):
    d = []
    for index, row in df.iterrows():
        title = row[2]
        a = re.search("摘要|取消", title)
        if a != None:
            d.append(index)
        nianfen=row[1]
    df1 = df.drop(d).reset_index(drop = True)
    return df1

df_sz = tidy(df_sz)


#下载年报
import requests
for i in range (0,206):
    r = requests.get(df['attachpath'][i], allow_redirects=True)
    time.sleep(2)
    f = open(df['证券代码'][i]+df['公告标题'][i]+'.pdf', 'wb')
    f.write(r.content)
    f.close()
    r.close()

结果2

#深交所能获取的文件：

#过滤后得到的数据：

代码3(获取上交所年报并下载)


        
    #获取上交所年报
        
    for i in range(0,len(code_sh)):
        browser = webdriver.Chrome()
        browser.get("http://www.sse.com.cn/disclosure/listedinfo/regular/")
        browser.find_element(By.ID, "inputCode").click()
        element = browser.find_element(By.ID, "inputCode").send_keys(code_sh[i])
        time.sleep(2)
        browser.find_element(By.CSS_SELECTOR, ".sse_outerItem:nth-child(4) .filter-option-inner-inner").click()
        time.sleep(2)
        browser.find_element(By.LINK_TEXT, "年报").click()
        time.sleep(2)
        element = browser.find_element(By.CLASS_nianfen, 'table-responsive')
        innerHTML = element.get_attribute("innerHTML")
        time.sleep(2)
        f = open("上交所年报.html",'a',encoding='utf-8')
        f.write(innerHTML)
        time.sleep(2)
    browser.quit()
    
    #解析上交所定期报告页搜索表格
        
def to_pretty(fhtml):
    f = open(fhtml,encoding='utf-8')
    html = f.read()
    f.close()

    soup = BeautifulSoup(html)
    html_prettified = soup.prettify()

    f = open(fhtml[0:-5]+'-prettified.html', 'w', encoding='utf-8')
    f.write(html_prettified)
    f.close()
    return(html_prettified)

html = to_pretty('上交所年报.html')

def txt_to_df(html):
    # html table text to DataFrame
    p = re.compile('(.*?)', re.DOTALL)
    trs = p.findall(html)

    p2 = re.compile('(.*?)', re.DOTALL)
    tds1 = [p2.findall(tr) for tr in trs[1:]]

    tds = list(filter(None, tds1))
    df = pd.DataFrame({'证券代码': [td[0] for td in tds],
                       '简称': [td[1] for td in tds],
                       '公告标题': [td[2] for td in tds],
                       '公告时间': [td[3] for td in tds]})
    return(df)

df_txt = txt_to_df(html)

p_a = re.compile('(.*?)', re.DOTALL)
p_span = re.compile('(.*?)', re.DOTALL)
p_space=re.compile('\s+(.*?)\s+',re.DOTALL)

get_a= lambda txt: p_a.search(txt).group(1).strip()
get_span = lambda txt: p_span.search(txt).group(1).strip()
get_space = lambda txt: p_space.search(txt).group(1).strip()

def get_link2(txt):
    p_txt = '(.*?)'
    p = re.compile(p_txt, re.DOTALL)
    matchObj = p.search(txt)
    href       = matchObj.group(1).strip()
    title      = matchObj.group(2).strip()
    return([href, title])

def get_data2(df_txt):
    prefix_href = 'http://static.sse.com.cn/'
    df = df_txt
    codes = [get_span(td) for td in df['证券代码']]
    short_nianfens = [get_span(td) for td in df['简称']]
    ahts = [get_link2(td) for td in df['公告标题']]
    times = [get_space(td) for td in df['公告时间']]
    df = pd.DataFrame({'证券代码': codes,
                       '简称': short_nianfens,
                       '公告标题': [aht[1] for aht in ahts],
                       'href': [prefix_href + aht[0] for aht in ahts],
                       '公告时间': times
        })
    return(df)

df_sh = get_data2(df_txt)    
    
#过滤年报摘要与已取消的年报
      def tidy(df):
          d = []
          for index, row in df.iterrows():
              title = row[2]
              a = re.search("摘要|取消", title)
              if a != None:
                  d.append(index)
              nianfen=row[1]
          df1 = df.drop(d).reset_index(drop = True)
          return df1

      df = tidy(df)

#下载年报
      import requests
      for i in range (0,428):
          href=df.iloc[i,3]
          r = requests.get(href, allow_redirects=True)
          time.sleep(2)
          f = open(df['证券代码'][i]+df['公告时间'][i]+'.pdf','wb')
          f.write(r.content)
          f.close()
          r.close()

结果3

#上交所能获取的文件：

#数据过滤后得到的数据：

#下载好的年报：

代码4(提取“股票简称”、“股票代码”、“办公地址”、“公司网址”)



#提取股票代码和股票简称

  #提取深交所股票代码和股票简称
  gpjc_sz = pd.DataFrame(df_sz['简称'])
  gpdm_sz = pd.DataFrame(df_sz['证券代码'])
  dmjc_sz = gpdm_sz.join(gpjc_sz, how='outer')
  dmjc_sz.drop_duplicates(subset=["简称"], inplace=True)

  #提取上交所股票代码和股票简称
  gpjc_sh = pd.DataFrame(df_sh['简称'])
  gpdm_sh = pd.DataFrame(df_sh['证券代码'])
  dmjc_sh = gpdm_sh.join(gpjc_sh, how='outer')
  dmjc_sh.drop_duplicates(subset=["简称"], inplace=True)

  #合并所有公司的股票代码和简称并另存csv文件
  dmjc = pd.concat([dmjc_sz,dmjc_sh])
  dmjc.reset_index(drop=True)
  dmjc.to_csv(r'C:\Users\Administrator\Desktop\\dmjc.csv')


  #提取办公地址，公司网址
  index_site = dmjc['证券代码'].drop_duplicates()
  index_web = dmjc['证券代码'].drop_duplicates()

  df_site = pd.DataFrame()#创建一个空表格
  df_web = pd.DataFrame()

 #使用所有公司2021年报中所披露的办公地址，公司网址
  filenianfens = os.listdir(r'C:\Users\Administrator\Desktop\2021NB')
  site = []
  web = []
  for pdf in filenianfens:
      i=0
      df2 = pd.DataFrame()
      df3 = pd.DataFrame()

      pdf = "\\"+pdf
      x = r"C:\Users\Administrator\Desktop\2021NB"+pdf#取读文件
      text =getText(x)

      p_site = re.compile('(?<=\n)\w*办公地址：?\s?\n?(.*?)\s?(?=\n)',re.DOTALL)
      site1 = p_site.search(text).group(0)
      site.append(site1)

      p_web = re.compile('(?<=\n)公司\w*网\s?址：?\s?\n?(.*?)\s?(?=\n)',re.DOTALL)
      web1 = p_web.search(text).group(0)
      web.append(web1)
  df2.insert(i,'办公地址' , site)
  df3.insert(i,'网址', web)
  i=i+1
  df_site = df_site.join(df2, how='outer')
  df_web = df_web.join(df3, how='outer')
  df_site.index =index_site
  df_web.index =index_web
  df_site.to_csv(r'C:\Users\Administrator\Desktop\2021NB\sites.csv')#保存为csv文件
  df_web.to_csv(r'C:\Users\Administrator\Desktop\2021NB\webs.csv')

结果4

#提取股票代码和股票简称：

#提取办公地址：

#提取公司网址：

代码5(提取“主要会计数据和财务指标”中的“营业收入（元）”、“基本每股收益（元 ╱ 股）”)


      
#定义函数提取“主要会计数据和财务指标”中的“营业收入（元）”
 def getText(pdf):
     text = ''
     doc = fitz.open(pdf)
     for page in doc:
         text += page.get_text()
     doc.close()
     text = text.replace(" "," \n")
     text = text.replace("\n\n","\n")
     return(text)
 def get_content(pdf):
     text = getText(pdf)
     p = re.compile('(?<=\\n)\D、\s*\D*?主要\D*?数据\D*?\s*(?=\\n)(.*?)经营活动产生的',re.DOTALL)#定位各个年报固定位置的内容
     content = p.search(text).group(0)
     return(content)
 def parse_data_line(pdf):
     content = get_content(pdf)
     sp = "([0-9,.%\- ]*?)\n"
     ps = "%s%s%s%s" % (sp,sp,sp,sp)
     p =re.compile("(?<=\\n)营业(\D*?\n)+%s" % ps)
     lines = p.search(content)
     lines = lines[0]
     return(lines)


#定义函数提取“主要会计数据和财务指标”中的“基本每股收益（元 ╱ 股）”
 def get_profit(pdf):
     text = getText(pdf)
     p = re.compile('(?<=\\n)\D、\s*\D*?主要\D*?指标\D*?\s*(?=\\n)(.*?)稀释每股',re.DOTALL)
     profit = p.search(text).group(0)
     return(profit)

 def profit_data_line(pdf):
     profit = get_profit(pdf)
     sp = "([0-9,.%\- ]*?)\n"
     ps = "%s%s%s%s" % (sp,sp,sp,sp)
     p =re.compile("(?<=\\n)基本每股收益(\D*?\n)+%s" % ps)#定义每股收益那一行的内容
     lines_profit = p.search(profit)
     lines_profit = lines_profit[0]
     return(lines_profit)
      
      
#循环获取营业收入
 df_sale = pd.DataFrame()#创建一个空表格
 path = r'C:\Users\Administrator\Desktop\tmgcjzy'
 os.chdir(path)
 sj = os.listdir(path)
 for info in sj:
     domain = os.path.abspath(path) #获取文件夹的路径
     info = os.path.join(domain,info)
     ste = info[-6:]
     filenianfens = os.listdir(ste)#获取各个公司文件夹中pdf文件的名称
     sale = []
     for pdf in filenianfens:
         i=0
         df = pd.DataFrame()#创建一个空表格
         ste = "\\"+ste
         pdf = "\\"+pdf
         x = r"C:\Users\Administrator\Desktop\tmgcjzy"+ste+pdf #获取完整路径
         sale_gain = parse_data_line(x)
         sale_gain = sale_gain.split("\n")#将列表里的字符串以换行符进行分割，形成新的列表
         sale_gain = sale_gain[1]#取列表中第二个字符串（营业收入）
         sale.append(sale_gain)#将提取的营业收入放入新的列表
     df.insert(i, ste, sale)
     i=i+1
     df_sale = df_sale.join(df, how='outer')
 print('循环结束')
 os.chdir(r'C:\Users\Administrator\Desktop\tmgcjzy')#在下载好的文件夹中循环

 index1 = ['2012营业收入','2013营业收入','2014营业收入','2015营业收入','2016营业收入',\
           '2017营业收入','2018营业收入','2019营业收入','2020营业收入','2021营业收入']
 df_sale_T = pd.DataFrame(df_sale.values.T,columns=index1,index=df_sale.columns)#转置
 df_sale_T.to_csv(r'C:\Users\Administrator\Desktop\tmgcjzy\sales.csv')#保存为csv文件

 #循环提取每股收益
 df_profit = pd.DataFrame()
 path = r'C:\Users\Administrator\Desktop\tmgcjzy'
 os.chdir(path)
 sj = os.listdir(path)
 for info in sj:
     domain = os.path.abspath(path) #获取文件夹的路径
     info = os.path.join(domain,info)
     ste = info[-6:]
     filenianfens = os.listdir(ste)#获取各个公司文件夹中pdf文件的名称
     profit = []
     for pdf in filenianfens:
         i=0
         df1 = pd.DataFrame()
         pdf = "\\"+pdf
         ste = "\\"+ste
         x = r"C:\Users\Administrator\Desktop\tmgcjzy"+ste+pdf#获取完整路径
         profit_gain = profit_data_line(x)
         profit_gain = profit_gain.split("\n")
         profit_gain = profit_gain[1]
         profit.append(profit_gain)
     df1.insert(i, ste, profit)
     i=i+1
     df_profit = df_profit.join(df1, how='outer')
 print('循环结束')
 os.chdir(r'C:\Users\Administrator\Desktop\tmgcjzy')

 index2 = ['2012每股收益','2013每股收益','2014每股收益','2015每股收益','2016每股收益',\
           '2017每股收益','2018每股收益','2019每股收益','2020每股收益','2021每股收益']
 df_profit_T = pd.DataFrame(df_profit.values.T,columns=index2,index=df_profit.columns)
 df_profit_T.to_csv(r'C:\Users\Administrator\Desktop\tmgcjzy\profits.csv')
      
      
#ps:若出现部分公司在循环过程中提取失败可能是正则表达式不匹配所引起的，故而若出现明显的数据错误可手动更改

结果5

#营业收入（元）：

#基本每股收益（元 ╱ 股）：

#例如提取基本每股收益时出现错误数据：

#人工查找后更改为：

代码6(每家上市公司绘制“营业收入（元）”、“基本每股收益（元 ╱ 股）”随时间变化趋势图)

  plt.rcParams['font.sans-serif']=['SimHei']
  plt.rcParams['axes.unicode_minus']=False
         
 #导入营业收入数据
 #ps:由于行业48（土木工程建筑业）中的公司数目超过10家，所以对营业收入最高的10家进行绘图
 在下载好的营业收入文件中对2021年度的营业收入额进行降序处理，如此文件中前10家公司即为营业收入最高的十家公司
   sales = pd.read_excel("sales.xlsx")
   sales = pd.DataFrame(sales)
   sales_high1 = sales.iloc[0:10,1:]#得到包含前10家公司（也是营业收入最高的10家公司）不同年份营业收入的表格
   sales_high2 = sales.iloc[0:10,]
   sales_high1.reset_index(drop=True)

   list_row = sales_high1.values.tolist()
   columns = list(sales_high1)

   list_columns = []
   for c in columns:
           d = sales_high1[c].values.tolist()
           list_columns.append(d)

  nianfen = ["2012","2013","2014","2015","2016","2017","2018","2019","2020","2021"]
  code_sales = ["600248","600039","000498","002061","000065","002307","600284","002062","002564","002140"]
# 画出每家公司的2012—2021的营业收入的时间变化趋势图（纵向对比）
    def y_ticks(list_row,code_sales):
      num_list_1 = list_row
      rects = plt.barh(range(len(list_row)),num_list_1,color='rgby')
      N = 10
      index = np.arange(N)
      plt.yticks(index,nianfen)
      plt.title(code_sales+" 2012—2021营业收入")
      plt.xlabel("营业收入（元）")
      plt.ylabel("年份")
      for rect in rects:
          w=rect.get_width()
          plt.text(w,rect.get_y()+rect.get_height()/2,w,size =10,ha='left',va='center')
      plt.savefig(code_sales +".png",dpi = 600)
      plt.show()
      for i in range(len(list_row)):
          y_ticks(list_row[i], code_sales[i])
         
         
#根据营业收入最高的10家公司对基本每股收益文件进行处理，将那10家公司放在前十行，如此文件中前10家公司即为营业收入最高的十家公司
         
         
 #导入基本每股收益数据
   profit = pd.read_excel('profits.xlsx')
   profit1 = pd.DataFrame(profit)
   profit_new1 = profit1.iloc[-10:,1:]#得到包含前十家公司（也是营业收入最高的10家公司）不同年份每股收益的表格
   profit_new2 = profit1.iloc[-10:,]
   profit_new1.reset_index(drop=True)
#画出每家公司的2012—2021的基本每股收益的时间变化趋势图（纵向对比）
   list_profit = profit_new1.values.tolist()#以行为单位取成列表,每个列表是十年同一公司的每股收益
   columns1 = list(profit_new1)

   list_columns_profit = []
   for c in columns1:
       d = profit_new1[c].values.tolist()
       list_columns_profit.append(d)

   nianfen = ["2012","2013","2014","2015","2016","2017","2018","2019","2020","2021"]
   code_profit = ["600248","600039","000498","002061","000065","002307","600284","002062","002564","002140"]
   def y_ticks2(list_profit,code_profit):
       num_list_1 = list_profit
       rects = plt.barh(range(len(list_profit)),num_list_1,color='rgby')
       N = 10
       index = np.arange(N)
       plt.yticks(index,nianfen)
       plt.title(code_profit+" 2012—2021基本每股收益对比")
       plt.xlabel("基本每股收益（元 ╱ 股）")
       plt.ylabel("年份")
       for rect in rects:
           w=rect.get_width()
           plt.text(w,rect.get_y()+rect.get_height()/2,w,size =10,ha='left',va='center')
       plt.savefig(code_profit +"每股收益.png",dpi = 600)
       plt.show()
   for i in range(len(list_profit)):
       y_ticks2(list_profit[i], code_profit[i])

结果6

#每家公司的2012—2021的营业收入的时间变化趋势图（纵向对比）

#每家公司的2012—2021的基本每股收益的时间变化趋势图（纵向对比）

代码7(按每一年度，对该行业内公司“营业收入（元）”、“基本每股收益（元 ╱ 股）”绘制对比图)


    
#利用代码6中导入的数据画出10家公司同一年度营业收入的对比图（横向对比）
    def x_ticks(list_columns,nianfen):
        num_list = list_columns
        rects = plt.bar(range(len(list_columns)),num_list,color="rgb",width = 1,tick_label=code_sales)
        plt.title(nianfen+"不同公司之间营业收入对比")
        plt.xlabel("公司代码")
        plt.ylabel("营业收入")
        for rect in rects:
            height = rect.get_height()
            plt.text(rect.get_x() + rect.get_width() / 2, height, str(height), size=10, ha="center", va="bottom")
        plt.savefig(nianfen +".png",dpi = 600)
        plt.show()

    for i in range(len(list_columns)):
        x_ticks(list_columns[i], nianfen[i])
    
    

    
#利用代码6中导入的数据画出10家公司同一年度基本每股收益的对比图（横向对比）
  def x_ticks2(list_columns_profit,nianfen):
      num_list = list_columns_profit
      rects = plt.bar(range(len(list_columns_profit)),num_list,color="rgb",width = 1,tick_label=code_profit)
      plt.title(nianfen+"不同公司基本每股收益对比")
      plt.xlabel("公司代码")
      plt.ylabel("基本每股收益（元 ╱ 股）")
      for rect in rects:
          height = rect.get_height()
          plt.text(rect.get_x() + rect.get_width() / 2, height, str(height), size=10, ha="center", va="bottom")
      plt.savefig(nianfen +"每股收益.png",dpi = 600)
      plt.show()
  for i in range(len(list_columns_profit)):
      x_ticks2(list_columns_profit[i], nianfen[i])

结果7

#10家公司同一年度营业收入的对比图（横向对比）

#10家公司同一年度基本每股收益的对比图（横向对比）

结果解读

解读营业收入和基本每股收益的时间变化趋势图（纵向对比图） :

营业收入纵向对比图：过去十年内，部分土木工程建筑业公司的营业收入递增，尤其是近三年部分公司的营业收入大幅增长，但同时也存在着部分土木工程建筑公司的营业额每年变动方向不同，增减情况没有明显趋势，结合土木工程建筑业的上市公司数量可知：随着我国城市化的推进，土木工程建筑业近年来竞争激烈，部分公司依靠发展和创新土木工程施工技术在过去几年中取得了较大营业收入的提高，但也存在部分公司受疫情的影响，在2020年和2021年的营业收入明显低于2019年的营业收入。可见，面对疫情不同的公司表现出的营业收入增幅明显不同，只有不断提高公司的自身的能力，抓住城市化进程的时机，方可取得较好的营业收入。

基本每股收益纵向对比图：从基本每股收益的纵向对比图中可以看出，大部分公司在2015-2017年期间的基本每股收益都不太高，这与当时整个行业低迷，营业收入没有明显提高有关，但可以明显地观察到近三年营业收入大幅增长的公司基本每股收益也都大幅增长，公司盈利能力提高，可见即使在疫情下，拥有施工优势的公司依然可以得到较大的发展，重点在于不断提高公司建筑方面的能力，提高公司盈利能力。

解读同一年度下不同公司的营业收入和基本每股收益对比图（横向对比图）:

营业收入横向对比图：从同一年度下不同公司的营业收入对比图中可以看出，建筑行业竞争激烈，公司营业收入差别较大，例如600039在过去10年中几乎有半数年份营业收入一骑绝尘，但是受疫情影响加之有类似600248公司和002061公司在过去几年中发展迅速，在过去三年中承包了许多建筑业务，导致营业收入的下降，但究其根本原因还是公司竞争力下降，没有把握住发展机会。但从十张对比图中可以看出建筑行业内部参差大，内部淘汰快，过去几年某公司发展良好，但并不意味着将来几年也能发展良好，土木工程建筑业的公司应当注重建筑创新，在保证施工质量的同时提高建筑速度等。

基本每股收益横向对比图：过去十年中，随着城市化的推进，大部分公司的每股收益都为正数，但同时也存在着连续几年每股收益不断下降的公司，例如：公司002061在2013-2015年的每股收益不断下降甚至在2014年和2015年出现负数，但是查看公司002061的2016年以来的每股收益，可以看到每股收益有个大幅上升的情况，结合它营业收入的情况可知，即使在连续几年行业+该公司本身不景气的情况下，抓住机会提高建筑施工质量，吸收建筑人才在竞争激烈的土木工程建筑业依然可以得到很好的发展。可见竞争激烈会淘汰部分公司，但也会促进部分公司不断提高自身竞争力，提高公司盈利能力，为中国建筑行业添砖加瓦！