案例-爬小說 · python筆記

~~~py import requests import re,sys,time from bs4 import BeautifulSoup head={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 UBrowser/6.1.2107.204 Safari/537.36' } class downloader(): def __init__(self): self.server='http://book.zongheng.com' self.target='http://book.zongheng.com/showchapter/929099.html' self.list=[] # 獲取下載鏈接 def get_download_url(self): # target='http://book.zongheng.com/showchapter/929099.html' req=requests.get(url=self.target) bf=BeautifulSoup(req.text,"html.parser") texts=bf.find_all('ul',class_='chapter-list clearfix') texts=BeautifulSoup(str(texts[1]),"html.parser").find_all('a') for a in texts: self.list.append({ 'name':a.string, 'url':a.get('href') }) # 獲取每章內容 def get_context(self,urls): req=requests.get(url=urls) bf=BeautifulSoup(req.text,"html.parser") texts=bf.find_all('div',class_='content') new_text=re.sub('[　| ]+','\n\n',texts[0].text) return new_text # 將爬取的數據寫入文件 def write_file(self,name,path,context): write_flag = True with open(path,'a',encoding='utf-8') as f: f.write(name+'\n') f.writelines(context) f.write('\n\n') if __name__=='__main__': dl=downloader() dl.get_download_url() i=0 print(len(dl.list)) for n in dl.list: i += 1 a=dl.get_context(n['url']) dl.write_file(n['name'],'不知道什么小說.txt',dl.get_context(n['url'])) sys.stdout.write("\r") sys.stdout.write(" 已下載:%.1f%%" % float(i/len(dl.list)*100)) sys.stdout.flush() ~~~