import pandas as pd
import openpyxl
import re
xlsx = 'D:/anaconda/新能源行业.xlsx'
df = pd.read_excel(xlsx)
exf = openpyxl.load_workbook(xlsx)
sheet = exf.active
C2 = sheet['C2']
C = sheet['C']
links = [c.value for c in C]
links_1 = links[1:-1]
links_2 = ''.join(links_1)
sample = '=HYPERLINK("http://news.windin.com/ns/bulletin.php?code=818C2D61C901&id=124505628&type=1","孚能科技:2020年年度报告(更新后)")'
p = re.compile('"(.*?)","(.*?)"')
list_of_tuple = p.findall(links_2)
df2 = pd.DataFrame({'link':[t[0] for t in list_of_tuple], 'f_name':[t[1] for t in list_of_tuple]})
df2.to_csv('通讯行业.csv')
import re
import requests
import pandas as pd
import time
import os
import sys
df = pd.read_csv('D:/anaconda/新能源行业.csv', encoding='gbk')
os.chdir('D:/anaconda/10companies')
Name_List=os.listdir()
links= []
f_names=[]
#links = df['link']; f_names = df['f_name']
Tlink= []
for Name in Name_List:
Dir = 'D:/anaconda/10companies/%s' % Name
tempD = pd.read_csv(Dir, encoding='utf-8')
links.extend(tempD['link'])
print(links)
# def get_PDF_url(url):
# r = requests.get(url);r.encoding = 'utf-8'; html = r.text
# r.close() # 已获取html内容,结束connection
# p = re.compile('<a href=(.*?)\s.*?>(.*?)</a>', re.DOTALL)
# a = p.search(html) # 因第一个<a>即是目标标签,故用search
# if a is None:
# Warning('没有找到下载链接。请手动检查链接:%s' % url)
# return()
# else:
# href = a.group(1); fname = a.group(2).strip()
# href = r.url[:26] + href # 形成完整的链接
# return((href,fname))
def get_PDF_url(url):
r = requests.get(url);r.encoding = 'utf-8'; html = r.text
r.close()
p = re.compile('<a href=(.*?)\s.*?>(.*?)</a>', re.DOTALL)
a = p.search(html)
if a is None:
Warning('没有找到下载链接。请手动检查链接:%s' % url)
return()
else:
href = a.group(1); fname = a.group(2).strip()
href = r.url[:26] + href
return((href,fname))
hrefs=[];fnames=[]
for link in links:
href,fname = get_PDF_url(link)
hrefs.append(href)
fnames.append(fname)
df_final_links=pd.DataFrame({'href':hrefs,'fname':fnames})
df_final_links.to_csv('D:/anaconda/10companies/final_links新能源.csv')
df_final_links=pd.read_csv('D:/anaconda/10companies/final_links新能源.csv')
f_names=df_final_links['fname']
hrefs=df_final_links['href']
for i in range(len(hrefs)):
href=hrefs[i];f_name=f_names[i]
r = requests.get(href, allow_redirects=True)
open('%s' %f_name, 'wb').write(r.content)
time.sleep(10)
r.close()
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
%matplotlib inline
rcParams['font.family'] = 'simhei'
df = pd.read_excel(r"C:\Users\john\Desktop\工作簿1.xls")
print(df)
plt.plot(df["年份"],df["孚能科技"],label='孚能科技',linewidth=1,color='r',markersize=12)
plt.plot(df["年份"],df["亿华通"],label='亿华通',linewidth=1,color='y',markersize=12)
plt.plot(df["年份"],df["鸿达兴业"],label='鸿达兴业',linewidth=1,color='blue',markersize=12)
plt.plot(df["年份"],df["鸿达兴业"],label='鸿达兴业',linewidth=1,color='purple',markersize=12)
plt.plot(df["年份"],df["方正电机"],label='方正电机',linewidth=1,color='green',markersize=12)
plt.plot(df["年份"],df["卧龙电驱"],label='卧龙电驱',linewidth=1,color='brown',markersize=12)
plt.plot(df["年份"],df["汉中精机"],label='汉中精机',linewidth=1,color='black',markersize=12)
plt.plot(df["年份"],df["鹏辉能源"],label='鹏辉能源',linewidth=1,color='pink',markersize=12)
plt.plot(df["年份"],df["宁德时代"],label='宁德时代',linewidth=1,color='grey',markersize=12)
plt.plot(df["年份"],df["奥特迅"],label='奥特迅',linewidth=1,color='violet',markersize=12)
plt.xlabel("年份")
plt.ylabel('营业总收入')
plt.title("营业收入走势图")
plt.legend()
plt.grid()
plt.show()
Unnamed: 0 年份 孚能科技 亿华通 鸿达兴业 方正电机 卧龙电驱 汉中精机 \ 0 NaN 2021 381.93 776.40 3206.85 717.22 3214.21 452.34 1 NaN 2020 306.79 1249.60 4436.05 1313.14 2124.16 307.14 2 NaN 2019 174.22 2651.12 4396.26 1998.02 2163.44 205.14 3 NaN 2018 128.15 1887.77 3515.81 1979.86 1576.22 285.88 4 NaN 2017 135.98 1746.53 2496.16 1606.25 1694.23 253.75 5 NaN 2016 118.92 1864.55 2042.11 775.64 1877.15 188.32 6 NaN 2015 104.10 1503.67 1347.65 89.44 1436.44 145.63 7 NaN 2014 55.44 1377.12 364.77 154.96 243.15 185.69 8 NaN 2013 30.15 1369.55 536.15 15.15 1834.71 136.14 9 NaN 2012 35.12 124.53 775.16 863.20 1937.14 286.34 鹏辉能源 宁德时代 奥特迅 0 831.06 4236.15 NaN 1 1006.43 2634.21 NaN 2 1981.97 2009.12 NaN 3 1381.97 1987.46 NaN 4 668.72 1889.45 NaN 5 973.12 1456.78 NaN 6 362.78 2456.64 NaN 7 563.21 1872.45 NaN 8 1310.12 1896.85 NaN 9 215.14 2435.12 NaN
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import rcParams
rcParams['font.family'] = 'simhei'
data = pd.read_excel('E:/anaconda/10companies/工作簿2.xlsx').set_index(['date'])
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
companies_name = ['孚能科技','亿华通','鸿达兴业','鸿达兴业','方正电机','卧龙电驱', '汉中精机', '鹏辉能源', '宁德时代', '奥特迅']
data0=data.iloc[0]
plt.barh(range(len(data0)), data0, tick_label=companies_name, color='#6699CC')
plt.title('2017年营业成本对比(单位:万元)')
plt.show()
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
companies_name = ['孚能科技','亿华通','鸿达兴业','鸿达兴业','方正电机','卧龙电驱', '汉中精机', '鹏辉能源', '宁德时代', '奥特迅']
data0=data.iloc[1]
plt.barh(range(len(data0)), data0, tick_label=companies_name, color='#6699CC')
plt.title('2018年营业成本对比(单位:万元)')
plt.show()
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
companies_name = ['孚能科技','亿华通','鸿达兴业','鸿达兴业','方正电机','卧龙电驱', '汉中精机', '鹏辉能源', '宁德时代', '奥特迅']
data0=data.iloc[2]
plt.barh(range(len(data0)), data0, tick_label=companies_name, color='#6699CC')
plt.title('2019年营业成本对比(单位:万元)')
plt.show()
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
companies_name = ['孚能科技','亿华通','鸿达兴业','鸿达兴业','方正电机','卧龙电驱', '汉中精机', '鹏辉能源', '宁德时代', '奥特迅']
data0=data.iloc[3]
plt.barh(range(len(data0)), data0, tick_label=companies_name, color='#6699CC')
plt.title('2020年营业成本对比(单位:万元)')
plt.show()
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
companies_name = ['孚能科技','亿华通','鸿达兴业','鸿达兴业','方正电机','卧龙电驱', '汉中精机', '鹏辉能源', '宁德时代', '奥特迅']
data0=data.iloc[4]
plt.barh(range(len(data0)), data0, tick_label=companies_name, color='#6699CC')
plt.title('2021年营业成本对比(单位:万元)')
plt.show()