爬取數據:
```
# -*- codeing = utf-8 -*-
from bs4 import BeautifulSoup # 網頁解析,獲取數據
import re # 正則表達式,進行文字匹配`
import urllib.request, urllib.error # 制定URL,獲取網頁數據
import xlwt # 進行excel操作
from Selenium4R import Chrome
from selenium import webdriver
import time
#import sqlite3 # 進行SQLite數據庫操作
findLink = re.compile(r'<span class="job-name"><a href="(.*?)" target="_blank">') # 創建正則表達式對象,標售規則 影片詳情鏈接的規則
findImgSrc = re.compile(r'<div class="company-text">(.*)</div>',re.S) #公司名稱
findTitle = re.compile(r'<em class="vline"></em>(.*)</p>') #學歷要求
findRating = re.compile(r'</a><em class="vline"></em>(.*)<em class="vline"></em>') #公司人員估值
findJudge = re.compile(r'<div class="info-desc">(.*)</div>') #福利
findInq = re.compile(r'<span class="red">(.*)</span>') #薪資
findarea = re.compile(r'<span class="job-area">(.*)</span>') #公司地點
findname = re.compile(r'<span class="job-name">(.*)</span>') #崗位名稱
findjyan = re.compile(r'<p>(.*)<em class="vline"></em>') #工作經驗要求
# findBd = re.compile(r'<p class="">(.*?)</p>', re.S)
# findImgSrc = re.compile(r'<img.*src="(.*?)"', re.S)
# re.S的意思是如果不使用re.S參數,則只在每一行內進行匹配,如果一行沒有,就換下一行重新開始,不會跨行。而使用re.S參數以后,正則表達式會將這個字符串作為一個整體,將“\n”當做一個普通的字符加入到這個字符串中,在整體中進行匹配
# r的意思是內容是字符串
# re.sub 該函數主要用于替換字符串中的匹配項。從上面的代碼中可以看到re.sub()方法中含有5個參數,下面進行一一說明(加粗的為必須參數):(1)pattern:該參數表示正則中的模式字符串;(2)repl:該參數表示要替換的字符串(即匹配到pattern后替換為repl),也可以是個函數;(3)string:該參數表示要被處理(查找替換)的原始字符串;
# Python strip() 方法用于移除字符串頭尾指定的字符(默認為空格或換行符)或字符序列。注意:該方法只能刪除開頭或是結尾的字符,不能刪除中間部分的字符。
def main():
baseurl = "https://www.zhipin.com/c101280100/?query=%E7%88%AC%E8%99%AB&page=" #要爬取的網頁鏈接
# 1.爬取網頁
datalist = getData(baseurl)
time_stamp = time.strftime('%m%d%H%M%S',time.localtime(time.time()))
savepath = "boss直聘爬蟲崗"+time_stamp+".xls" #當前目錄新建XLS,存儲進去
# dbpath = "movie.db" #當前目錄新建數據庫,存儲進去
# 3.保存數據
saveData(datalist,savepath) #2種存儲方式可以只選擇一種
# saveData2DB(datalist,dbpath)
# 爬取網頁
def getData(baseurl):
datalist = [] #用來存儲爬取的網頁信息
for i in range(0, 3): # 調用獲取頁面信息的函數,10次
url = baseurl + str(i + 1) + '&ka=page-' + str(i + 1)
# html = askURL(url) # 保存獲取到的網頁源碼
# driver = Chrome(cache_path=r"E:\Temp")
driver = webdriver.Chrome(r'C:\Program Files\Google\Chrome\Application\chromedriver.exe')
# url = "https://www.zhipin.com/c101020100/e_102/?query=web%E5%89%8D%E7%AB%AF&page=1&ka=page-1"
driver.get(url)
time.sleep(8)
# js = "window.open("+url+")"
# driver.execute_script(js)
html = driver.page_source
# 2.逐一解析數據
soup = BeautifulSoup(html, "html.parser")
for item in soup.find_all('div', class_="job-primary"): # 查找符合要求的字符串
data = [] # 保存一部電影所有信息
item = str(item)
link = re.findall(findInq, item)[0] # 通過正則表達式查找
linka = re.findall(findTitle, item)[0] # 通過正則表達式查找
# linka = re.sub('[A-Za-z]',"", linka)
# linka = re.sub('[\s+\.\!\/_,$%^*(+\"|<>]+',"", linka)
regex_str = ".*?([\u4E00-\u9FA5]+).*?"
linka = re.findall(regex_str, linka)
data.append(link)
data.append(linka)
linkb = re.findall(findjyan, item)[0]
data.append(linkb)
linkc = re.findall(findarea, item)[0]
data.append(linkc)
linkd = re.findall(findname, item)[0]
data.append(linkd)
imgSrc = re.findall(findImgSrc, item)[0]
# imgtest = re.compile(r'<h3 class="name"></h3>',re.S)
imgSrc = re.findall('target="_blank">(.*)</a></h3>', imgSrc)
data.append(imgSrc)
# titles = re.findall(findTitle, item)
# if (len(titles) == 2):
# ctitle = titles[0]
# data.append(ctitle)
# otitle = titles[1].replace("/", "") #消除轉義字符
# data.append(otitle)
# else:
# data.append(titles[0])
# data.append(' ')
# rating = re.findall(findRating, item)[0]
# data.append(rating)
# judgeNum = re.findall(findJudge, item)[0]
# data.append(judgeNum)
# inq = re.findall(findInq, item)
# if len(inq) != 0:
# inq = inq[0].replace("。", "")
# data.append(inq)
# else:
# data.append(" ")
# bd = re.findall(findBd, item)[0]
# bd = re.sub('<br(\s+)?/>(\s+)?', "", bd)
# bd = re.sub('/', "", bd)
# data.append(bd.strip())
datalist.append(data)
return datalist
# 得到指定一個URL的網頁內容
def askURL(url):
head = { # 模擬瀏覽器頭部信息,向豆瓣服務器發送消息
"User-Agent": "Mozilla / 5.0(Windows NT 10.0; Win64; x64) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 80.0.3987.122 Safari / 537.36",
"cookie":""
}
# 用戶代理,表示告訴豆瓣服務器,我們是什么類型的機器、瀏覽器(本質上是告訴瀏覽器,我們可以接收什么水平的文件內容)
request = urllib.request.Request(url, headers=head)
html = ""
try:
response = urllib.request.urlopen(request)
html = response.read().decode("utf-8")
except urllib.error.URLError as e:
if hasattr(e, "code"):
print(e.code)
if hasattr(e, "reason"):
print(e.reason)
return html
# 保存數據到表格
def saveData(datalist,savepath):
print("save.......")
print(datalist)
book = xlwt.Workbook(encoding="utf-8",style_compression=0) #創建workbook對象
sheet = book.add_sheet('豆瓣電影Top250', cell_overwrite_ok=True) #創建工作表
col = ("電影詳情鏈接","圖片鏈接","影片中文名","影片外國名","評分","評價數","概況","相關信息")
for i in range(0,6):
sheet.write(0,i,col[i]) #列名
for i in range(0,90):
# print("第%d條" %(i+1)) #輸出語句,用來測試
data = datalist[i]
for j in range(0,6):
sheet.write(i+1,j,data[j]) #數據
book.save(savepath) #保存
if __name__ == "__main__": # 當程序執行時
# 調用函數
main()
# init_db("movietest.db")
print("爬取完畢!")
```
*****
2:
```
from bs4 import BeautifulSoup
from selenium import webdriver
import csv
import time
def fillPostList(postlist,html):
try:
soup = BeautifulSoup(html,"html.parser")
job_all = soup.find_all('div', {"class": "job-primary"})
for job in job_all:
position = job.find('span', {"class": "job-name"}).get_text()
address = job.find('span', {'class': "job-area"}).get_text()
company = job.find('div', {'class': 'company-text'}).find('h3', {'class': "name"}).get_text()
salary = job.find('span', {'class': 'red'}).get_text()
diploma = job.find('div', {'class': 'job-limit clearfix'}).find('p').get_text()[-2:]
experience = job.find('div', {'class': 'job-limit clearfix'}).find('p').get_text()[:-2]
labels = job.find('a', {'class': 'false-link'}).get_text()
postlist.append([position,address,company,salary,diploma,experience,labels])
except IndexError:
pass
def main():
jobinfo = []
driver = webdriver.Chrome()
url = "https://www.zhipin.com/c101280100/?query=%E7%88%AC%E8%99%AB&page=1&ka=page-1"
driver.get(url)
time.sleep(8)
html = driver.page_source
fillPostList(jobinfo,html)
#將jobinfo列表信息寫入csv文件
headers = ["職位","工作地址","公司全稱","薪水","學歷","工作經驗","行業標簽"]
with open('job.csv','w',newline = '')as f:
f_csv = csv.writer(f)
f_csv.writerow(headers)
f_csv.writerows(jobinfo)
driver.quit()
main()
```
3:
```
# Boss直聘
from bs4 import BeautifulSoup
import requests
import xlwt
from selenium import webdriver
from lxml import etree
import time
begin = int(input("輸入起始頁:"))
end = int(input("輸入終止頁:"))
url = "https://www.zhipin.com/c101280100/?query=%E7%88%AC%E8%99%AB&page=1&ka=page-1"
base_url="https://www.zhipin.com"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4557.4 Safari/537.36',
'cookie': '__g=-; Hm_lvt_194df3105ad7148dcf2b98a91b5e727a=1628342274,1628476062,1628559147; lastCity=100010000; __c=1628559147; __l=l=%2Fwww.zhipin.com%2Fc100010000%2F%3Fpage%3D1%26ka%3Dpage-1&r=&g=&s=3&friend_source=0&s=3&friend_source=0; __a=51751789.1628342272.1628476062.1628559147.80.3.2.80; Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a=1628559375; __zp_stoken__=44fccEA1HA2tYaygfIi87Y39AOV8QMShFLTJsCThyVHN4TQUcEithWCZrdEBRdGB%2BT3s1cRw9fggUJQYnIEMHSE0rHHpfbE0yGiREN2IMbHcNX3s6dg5iIzgCdHxZREcDf1glTGc4AHw%2FcjoH',
'referer': 'https://www.zhipin.com/c100010000/?page=2&ka=page-2'
}
names = []
locations = []
salarys = []
requirements = []
educations = []
companys = []
links = []
items = []
for page in range(begin, end+1):
param = {
'page': page
}
# response = requests.get(url, params=param, headers=headers)
driver = webdriver.Chrome(r'C:\Program Files\Google\Chrome\Application\chromedriver.exe')
# url = "https://www.zhipin.com/c101020100/e_102/?query=web%E5%89%8D%E7%AB%AF&page=1&ka=page-1"
driver.get(url)
time.sleep(8)
# js = "window.open("+url+")"
# driver.execute_script(js)
html = driver.page_source
#print(html)
root=etree.HTML(html)
name=root.xpath('//*[@id="main"]/div/div[2]/ul/li/div/div[1]/div[1]/div/div[1]/span[1]/a/text()')
names.extend(name)
location=root.xpath('// *[ @ id = "main"]/div/div[2]/ul/li/div/div[1]/div[1]/div/div[1]/span[2]/span/text()')
locations.extend(location)
salary=root.xpath('// *[ @ id = "main"] / div / div[2] / ul / li / div / div[1] / div[1] / div / div[2] / span/text()')
salarys.extend(salary)
requirement=root.xpath('// *[ @ id = "main"] / div / div[2] / ul / li / div / div[1] / div[1] / div / div[2] / p / text()[1]')
requirements.extend(requirement)
education=root.xpath('//*[@id="main"]/div/div[2]/ul/li/div/div[1]/div[1]/div/div[2]/p/text()[2]')
educations.extend(education)
company=root.xpath('// *[ @ id = "main"] / div / div[2] / ul / li / div / div[1] / div[2] / div / h3 / a/text()')
companys.extend(company)
link=root.xpath('//*[@id="main"]/div/div[2]/ul/li/div/div[1]/div[1]/div/div[1]/span[1]/a/@href')
for i in range(0,len(link)):
link[i]=base_url+link[i]
links.extend(link)
items.append(names)
items.append(locations)
items.append(salarys)
items.append(requirements)
items.append(educations)
items.append(companys)
items.append(links)
#print(items)
book = xlwt.Workbook(encoding='utf-8')
sheet = book.add_sheet('items')
head = ['職位名稱', '工作地點', '薪水', '工作經驗', '學歷', '公司','詳情鏈接']
for i in range(0, 7):
sheet.write(0, i, head[i])
for i in range(0, 7):
a = items[i]
for j in range(len(a)):
sheet.write(j + 1, i, a[j])
book.save('Boss直聘12.xls')
```
4:
```
# -*- codeing = utf-8 -*-
from bs4 import BeautifulSoup # 網頁解析,獲取數據
import re # 正則表達式,進行文字匹配`
import urllib.request, urllib.error # 制定URL,獲取網頁數據
import xlwt # 進行excel操作
from Selenium4R import Chrome
from selenium import webdriver
import time
import csv
#import sqlite3 # 進行SQLite數據庫操作
begin = int(input("輸入起始頁:"))
end = int(input("輸入終止頁:"))
def main():
baseurl = "https://www.zhipin.com/c101280100/?query=%E7%88%AC%E8%99%AB&page=" #要爬取的網頁鏈接
# 1.爬取網頁
datalist = getData(baseurl)
time_stamp = time.strftime('%m%d%H%M%S',time.localtime(time.time()))
savepath = "boss直聘爬蟲崗"+time_stamp+".csv" #當前目錄新建XLS,存儲進去
# dbpath = "movie.db" #當前目錄新建數據庫,存儲進去
# 3.保存數據
saveData(datalist,savepath) #2種存儲方式可以只選擇一種
# 爬取網頁
def getData(baseurl):
datalist = [] #用來存儲爬取的網頁信息
for i in range(begin, end+1): # 調用獲取頁面信息的函數,10次
url = baseurl + str(i) + '&ka=page-' + str(i)
driver = webdriver.Chrome(r'C:\Program Files\Google\Chrome\Application\chromedriver.exe')
driver.get(url)
time.sleep(8)
html = driver.page_source
# 2.逐一解析數據
soup = BeautifulSoup(html, "html.parser")
for job in soup.find_all('div', {"class": "job-primary"}): # 查找符合要求的字符串
data = [] # 保存一部電影所有信息
# item = str(item)
position = job.find('span', {"class": "job-name"}).get_text()
address = job.find('span', {'class': "job-area"}).get_text()
company = job.find('div', {'class': 'company-text'}).find('h3', {'class': "name"}).get_text()
salary = job.find('span', {'class': 'red'}).get_text()
diploma = job.find('div', {'class': 'job-limit clearfix'}).find('p').get_text()[-2:]
experience = job.find('div', {'class': 'job-limit clearfix'}).find('p').get_text()[:-2]
labels = job.find('a', {'class': 'false-link'}).get_text()
# company_status_result = re.search(r'<em class="vline"/>(.*?)<em class="vline"/>', job)[0]
# if company_status_result:
# company_status = company_status_result.group(1)
# else:
# company_status = '無信息'
# data.append([position,address,company,salary,diploma,experience,labels])
data.append(position)
data.append(address)
data.append(company)
data.append(salary)
data.append(diploma)
data.append(experience)
data.append(labels)
datalist.append(data)
return datalist
# 保存數據到表格
def saveData(datalist,savepath):
print("save.......")
print(datalist)
# book = csv.Workbook(encoding="utf-8",style_compression=0) #創建workbook對象
# sheet = book.add_sheet('豆瓣電影Top250', cell_overwrite_ok=True) #創建工作表
# f = open(savepath,'w',encoding='utf-8',newline = '')
# csv_writer = csv.writer(f)
col = ("崗位名稱","招聘地點","企業名","薪資","學歷","經驗要求","類型")
headers = ["職位","工作地址","公司全稱","薪水","學歷","工作經驗","行業標簽"]
# csv_writer.writerow(headers)
with open(savepath,'w',encoding='utf-8',newline = '') as f:
f_csv = csv.writer(f)
f_csv.writerow(headers)
f_csv.writerows(datalist)
# for i in range(0,7):
# # csv_writer.writerow(0,i,col[i]) #列名
# for i in range(len(datalist)):
# # print("第%d條" %(i+1)) #輸出語句,用來測試
# data = datalist[i]
# for j in range(0,7):
# csv_writer.writerows(data[j])
# csv_writer.writerow(i+1,j,data[j]) #數據
# book.save(savepath) #保存
if __name__ == "__main__": # 當程序執行時
# 調用函數
main()
# init_db("movietest.db")
print("爬取完畢!")
```
6:
```
from pyspider.libs.base_handler import *
import pymysql
import random
import time
import re
count = 0
class Handler(BaseHandler):
# 添加請求頭,否則出現403報錯
crawl_config = {'headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}}
def __init__(self):
# 連接數據庫
self.db = pymysql.connect(host='127.0.0.1', user='root', password='774110919', port=3306, db='boss_job', charset='utf8mb4')
def add_Mysql(self, id, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people):
# 將數據寫入數據庫中
try:
cursor = self.db.cursor()
sql = 'insert into job(id, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people) values ("%d", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")' % (id, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people);
print(sql)
cursor.execute(sql)
print(cursor.lastrowid)
self.db.commit()
except Exception as e:
print(e)
self.db.rollback()
@every(minutes=24 * 60)
def on_start(self):
# 因為pyspider默認是HTTP請求,對于HTTPS(加密)請求,需要添加validate_cert=False,否則599/SSL報錯
self.crawl('https://www.zhipin.com/job_detail/?query=%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90&scity=100010000&industry=&position=', callback=self.index_page, validate_cert=False)
@config(age=10 * 24 * 60 * 60)
def index_page(self, response):
time.sleep(random.randint(2, 5))
for i in response.doc('li > div').items():
# 設置全局變量
global count
count += 1
# 崗位名稱
job_title = i('.job-title').text()
print(job_title)
# 崗位薪水
job_salary = i('.red').text()
print(job_salary)
# 崗位地點
city_result = re.search('(.*?)<em class=', i('.info-primary > p').html())
job_city = city_result.group(1).split(' ')[0]
print(job_city)
# 崗位經驗
experience_result = re.search('<em class="vline"/>(.*?)<em class="vline"/>', i('.info-primary > p').html())
job_experience = experience_result.group(1)
print(job_experience)
# 崗位學歷
job_education = i('.info-primary > p').text().replace(' ', '').replace(city_result.group(1).replace(' ', ''), '').replace(experience_result.group(1).replace(' ', ''),'')
print(job_education)
# 公司名稱
company_name = i('.info-company a').text()
print(company_name)
# 公司類型
company_type_result = re.search('(.*?)<em class=', i('.info-company p').html())
company_type = company_type_result.group(1)
print(company_type)
# 公司狀態
company_status_result = re.search('<em class="vline"/>(.*?)<em class="vline"/>', i('.info-company p').html())
if company_status_result:
company_status = company_status_result.group(1)
else:
company_status = '無信息'
print(company_status)
# 公司規模
company_people = i('.info-company p').text().replace(company_type, '').replace(company_status,'')
print(company_people + '\n')
# 寫入數據庫中
self.add_Mysql(count, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people)
# 獲取下一頁信息
next = response.doc('.next').attr.href
if next != 'javascript:;':
self.crawl(next, callback=self.index_page, validate_cert=False)
else:
print("The Work is Done")
# 詳情頁信息獲取,由于訪問次數有限制,不使用
#for each in response.doc('.name > a').items():
#url = each.attr.href
#self.crawl(each.attr.href, callback=self.detail_page, validate_cert=False)
@config(priority=2)
def detail_page(self, response):
# 詳情頁信息獲取,由于訪問次數有限制,不使用
message_job = response.doc('div > .info-primary > p').text()
city_result = re.findall('城市:(.*?)經驗', message_job)
experience_result = re.findall('經驗:(.*?)學歷', message_job)
education_result = re.findall('學歷:(.*)', message_job)
message_company = response.doc('.info-company > p').text().replace(response.doc('.info-company > p > a').text(),'')
status_result = re.findall('(.*?)\d', message_company.split(' ')[0])
people_result = message_company.split(' ')[0].replace(status_result[0], '')
return {
"job_title": response.doc('h1').text(),
"job_salary": response.doc('.info-primary .badge').text(),
"job_city": city_result[0],
"job_experience": experience_result[0],
"job_education": education_result[0],
"job_skills": response.doc('.info-primary > .job-tags > span').text(),
"job_detail": response.doc('div').filter('.text').eq(0).text().replace('\n', ''),
"company_name": response.doc('.info-company > .name > a').text(),
"company_status": status_result[0],
"company_people": people_result,
"company_type": response.doc('.info-company > p > a').text(),
}
```
*****
數據可視化:
```
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pylab import mpl
import matplotlib.font_manager as fm
dfs = pd.read_csv('boss直聘爬蟲崗1217135602.csv',encoding='utf-8')
data_df = pd.DataFrame(dfs)
# df['prince'].fillna(df['prince'].mean())
# print("\n查看是否有缺失值\n", data_df.isnull().sum())
data_df_del_empty = data_df.dropna(subset=['職位'], axis=0)
data_df_python_keyword = data_df_del_empty.loc[data_df_del_empty['職位'].str.contains('爬蟲|python|Python')]
# print(data_df_python_keyword)#篩選帶有python的行
# 區間最小薪資
data_df_python_keyword_salary = data_df_python_keyword['薪水'].str.split('-', expand=True)[0] + 'K'
# print(data_df_python_keyword_salary) # 區間最小薪資
# Dataframe新增一列 在第 列新增一列名為' ' 的一列 數據
data_df_python_keyword.insert(7, '最小薪資', data_df_python_keyword_salary)
# print(data_df_python_keyword['學歷'])
Fre_f=pd.DataFrame(dfs["學歷"].value_counts())
Fre_x=data_df_python_keyword["最小薪資"].value_counts()
def Bar_1(data,title,is_a):
#設置全景中文字體
my_font=fm.FontProperties(fname="C:/Windows/Fonts/msyhl.ttc")
mpl.rcParams['font.sans-serif'] = my_font.get_name()
mpl.rcParams["axes.unicode_minus"] = False
#畫直方圖
#定義圖片大小
p=plt.figure(figsize=(20,8),dpi=300)
ax=p.add_subplot(1,1,1) #創建一個1行1列的子圖,并開始繪制第1幅
#去掉子圖的上,右邊框
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
x=list(data.index)
if(is_a == 1):
y=list(data)
else:
y=list(data['學歷'])
plt.bar(range(len(x)),y,color="#4CAF50",width = 0.5)
plt.xticks(range(len(x)),x,font=my_font,fontsize=8,color="red")
plt.yticks(font=my_font,fontsize=8,color="#006400")
#定義背景網格線
plt.grid(axis="y",linestyle="--",color="#FFA500",alpha=0.5)
#加上注釋()中的屬性分別為文本內容,注釋坐標,文本坐標
color_list=["#4B0082","#2F4F4F","#32CD32","#808000","#B22222","#808000"]
# plt.show()
# print(y[0])
# return;
for i in range(len(y)):
plt.annotate(y[i],xy=(i,y[i]),xytext=(i,y[i]),font=my_font,fontsize=8,color="#808000")
#加上x,y軸的標簽
plt.xlabel("\n學歷",font=my_font,fontsize=20)
plt.ylabel("招聘人數\n",font=my_font,fontsize=20)
#加上標題
plt.title(title,font=my_font,fontsize=15,color="#FFD700")
# plt.savefig("C:/Users/HUAWEI/Desktop/大數據就業與學歷關系直方圖.png")
plt.show()
def Bie_1(data,title,is_a):
plt.figure(figsize=(10,5),dpi=150) #調節圖形大小
x=list(data.index)
if(is_a == 1):
y=list(data)
else:
y=list(data['學歷'])
labels = x #定義標簽
sizes = y #每塊值
colors = ['red','yellowgreen','lightskyblue','yellow','blue'] #每塊顏色定義
explode = (0.1,0.05,0.05,0.05,1.2) #將某一塊分割出來,值越大分割出的間隙越大
patches,text1,text2 = plt.pie(sizes,
labels=labels,
autopct = '%3.2f%%', #數值保留固定小數位
shadow = False, #無陰影設置
startangle =0, #逆時針起始角度設置
pctdistance = 0.6) #數值距圓心半徑倍數的距離
#patches餅圖的返回值,texts1餅圖外label的文本,texts2餅圖內部的文本
# x,y軸刻度設置一致,保證餅圖為圓形
plt.axis('equal')
#設置圖列
my_font1=fm.FontProperties(fname="C:/Windows/Fonts/msyhl.ttc",size=10)
plt.legend(prop=my_font1)
#設置標題
my_font2=fm.FontProperties(fname="C:/Windows/Fonts/msyhl.ttc",size=20)
plt.title(title,font=my_font2,color='#32CD32')
plt.show()
Bie_1(Fre_f,"爬蟲就業與學歷關系",0)
# Bar_1(Fre_x,"爬蟲就業與學歷關系",1)
# annotate 主要是添加注釋,如柱狀圖頂部添加數字注釋等
# bar 主要用來繪制柱形圖
# value_counts()是一種查看表格某列中有多少個不同值的快捷方法,并計算每個不同值有在該列中有多少重復值。
```