from bs4 import BeautifulSoup#导入模块
import requests
r = requests.get('http://www.jxufe.edu.cn')
html = r.text #读取网页所有内容
soup = BeautifulSoup(html)
content = soup.get_text()#只提取文字内容
print(soup.prettify())
import re #运用正则表达式
txt = re.sub("\s"," ",content) #用sub()函数将多个空格符替换为一个
详情请见代码