<ruby id="bdb3f"></ruby>

    <p id="bdb3f"><cite id="bdb3f"></cite></p>

      <p id="bdb3f"><cite id="bdb3f"><th id="bdb3f"></th></cite></p><p id="bdb3f"></p>
        <p id="bdb3f"><cite id="bdb3f"></cite></p>

          <pre id="bdb3f"></pre>
          <pre id="bdb3f"><del id="bdb3f"><thead id="bdb3f"></thead></del></pre>

          <ruby id="bdb3f"><mark id="bdb3f"></mark></ruby><ruby id="bdb3f"></ruby>
          <pre id="bdb3f"><pre id="bdb3f"><mark id="bdb3f"></mark></pre></pre><output id="bdb3f"></output><p id="bdb3f"></p><p id="bdb3f"></p>

          <pre id="bdb3f"><del id="bdb3f"><progress id="bdb3f"></progress></del></pre>

                <ruby id="bdb3f"></ruby>

                ThinkChat2.0新版上線,更智能更精彩,支持會話、畫圖、視頻、閱讀、搜索等,送10W Token,即刻開啟你的AI之旅 廣告
                爬取數據: ``` # -*- codeing = utf-8 -*- from bs4 import BeautifulSoup # 網頁解析,獲取數據 import re # 正則表達式,進行文字匹配` import urllib.request, urllib.error # 制定URL,獲取網頁數據 import xlwt # 進行excel操作 from Selenium4R import Chrome from selenium import webdriver import time #import sqlite3 # 進行SQLite數據庫操作 findLink = re.compile(r'<span class="job-name"><a href="(.*?)" target="_blank">') # 創建正則表達式對象,標售規則 影片詳情鏈接的規則 findImgSrc = re.compile(r'<div class="company-text">(.*)</div>',re.S) #公司名稱 findTitle = re.compile(r'<em class="vline"></em>(.*)</p>') #學歷要求 findRating = re.compile(r'</a><em class="vline"></em>(.*)<em class="vline"></em>') #公司人員估值 findJudge = re.compile(r'<div class="info-desc">(.*)</div>') #福利 findInq = re.compile(r'<span class="red">(.*)</span>') #薪資 findarea = re.compile(r'<span class="job-area">(.*)</span>') #公司地點 findname = re.compile(r'<span class="job-name">(.*)</span>') #崗位名稱 findjyan = re.compile(r'<p>(.*)<em class="vline"></em>') #工作經驗要求 # findBd = re.compile(r'<p class="">(.*?)</p>', re.S) # findImgSrc = re.compile(r'<img.*src="(.*?)"', re.S) # re.S的意思是如果不使用re.S參數,則只在每一行內進行匹配,如果一行沒有,就換下一行重新開始,不會跨行。而使用re.S參數以后,正則表達式會將這個字符串作為一個整體,將“\n”當做一個普通的字符加入到這個字符串中,在整體中進行匹配 # r的意思是內容是字符串 # re.sub 該函數主要用于替換字符串中的匹配項。從上面的代碼中可以看到re.sub()方法中含有5個參數,下面進行一一說明(加粗的為必須參數):(1)pattern:該參數表示正則中的模式字符串;(2)repl:該參數表示要替換的字符串(即匹配到pattern后替換為repl),也可以是個函數;(3)string:該參數表示要被處理(查找替換)的原始字符串; # Python strip() 方法用于移除字符串頭尾指定的字符(默認為空格或換行符)或字符序列。注意:該方法只能刪除開頭或是結尾的字符,不能刪除中間部分的字符。 def main(): baseurl = "https://www.zhipin.com/c101280100/?query=%E7%88%AC%E8%99%AB&page=" #要爬取的網頁鏈接 # 1.爬取網頁 datalist = getData(baseurl) time_stamp = time.strftime('%m%d%H%M%S',time.localtime(time.time())) savepath = "boss直聘爬蟲崗"+time_stamp+".xls" #當前目錄新建XLS,存儲進去 # dbpath = "movie.db" #當前目錄新建數據庫,存儲進去 # 3.保存數據 saveData(datalist,savepath) #2種存儲方式可以只選擇一種 # saveData2DB(datalist,dbpath) # 爬取網頁 def getData(baseurl): datalist = [] #用來存儲爬取的網頁信息 for i in range(0, 3): # 調用獲取頁面信息的函數,10次 url = baseurl + str(i + 1) + '&ka=page-' + str(i + 1) # html = askURL(url) # 保存獲取到的網頁源碼 # driver = Chrome(cache_path=r"E:\Temp") driver = webdriver.Chrome(r'C:\Program Files\Google\Chrome\Application\chromedriver.exe') # url = "https://www.zhipin.com/c101020100/e_102/?query=web%E5%89%8D%E7%AB%AF&page=1&ka=page-1" driver.get(url) time.sleep(8) # js = "window.open("+url+")" # driver.execute_script(js) html = driver.page_source # 2.逐一解析數據 soup = BeautifulSoup(html, "html.parser") for item in soup.find_all('div', class_="job-primary"): # 查找符合要求的字符串 data = [] # 保存一部電影所有信息 item = str(item) link = re.findall(findInq, item)[0] # 通過正則表達式查找 linka = re.findall(findTitle, item)[0] # 通過正則表達式查找 # linka = re.sub('[A-Za-z]',"", linka) # linka = re.sub('[\s+\.\!\/_,$%^*(+\"|<>]+',"", linka) regex_str = ".*?([\u4E00-\u9FA5]+).*?" linka = re.findall(regex_str, linka) data.append(link) data.append(linka) linkb = re.findall(findjyan, item)[0] data.append(linkb) linkc = re.findall(findarea, item)[0] data.append(linkc) linkd = re.findall(findname, item)[0] data.append(linkd) imgSrc = re.findall(findImgSrc, item)[0] # imgtest = re.compile(r'<h3 class="name"></h3>',re.S) imgSrc = re.findall('target="_blank">(.*)</a></h3>', imgSrc) data.append(imgSrc) # titles = re.findall(findTitle, item) # if (len(titles) == 2): # ctitle = titles[0] # data.append(ctitle) # otitle = titles[1].replace("/", "") #消除轉義字符 # data.append(otitle) # else: # data.append(titles[0]) # data.append(' ') # rating = re.findall(findRating, item)[0] # data.append(rating) # judgeNum = re.findall(findJudge, item)[0] # data.append(judgeNum) # inq = re.findall(findInq, item) # if len(inq) != 0: # inq = inq[0].replace("。", "") # data.append(inq) # else: # data.append(" ") # bd = re.findall(findBd, item)[0] # bd = re.sub('<br(\s+)?/>(\s+)?', "", bd) # bd = re.sub('/', "", bd) # data.append(bd.strip()) datalist.append(data) return datalist # 得到指定一個URL的網頁內容 def askURL(url): head = { # 模擬瀏覽器頭部信息,向豆瓣服務器發送消息 "User-Agent": "Mozilla / 5.0(Windows NT 10.0; Win64; x64) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 80.0.3987.122 Safari / 537.36", "cookie":"" } # 用戶代理,表示告訴豆瓣服務器,我們是什么類型的機器、瀏覽器(本質上是告訴瀏覽器,我們可以接收什么水平的文件內容) request = urllib.request.Request(url, headers=head) html = "" try: response = urllib.request.urlopen(request) html = response.read().decode("utf-8") except urllib.error.URLError as e: if hasattr(e, "code"): print(e.code) if hasattr(e, "reason"): print(e.reason) return html # 保存數據到表格 def saveData(datalist,savepath): print("save.......") print(datalist) book = xlwt.Workbook(encoding="utf-8",style_compression=0) #創建workbook對象 sheet = book.add_sheet('豆瓣電影Top250', cell_overwrite_ok=True) #創建工作表 col = ("電影詳情鏈接","圖片鏈接","影片中文名","影片外國名","評分","評價數","概況","相關信息") for i in range(0,6): sheet.write(0,i,col[i]) #列名 for i in range(0,90): # print("第%d條" %(i+1)) #輸出語句,用來測試 data = datalist[i] for j in range(0,6): sheet.write(i+1,j,data[j]) #數據 book.save(savepath) #保存 if __name__ == "__main__": # 當程序執行時 # 調用函數 main() # init_db("movietest.db") print("爬取完畢!") ``` ***** 2: ``` from bs4 import BeautifulSoup from selenium import webdriver import csv import time def fillPostList(postlist,html): try: soup = BeautifulSoup(html,"html.parser") job_all = soup.find_all('div', {"class": "job-primary"}) for job in job_all: position = job.find('span', {"class": "job-name"}).get_text() address = job.find('span', {'class': "job-area"}).get_text() company = job.find('div', {'class': 'company-text'}).find('h3', {'class': "name"}).get_text() salary = job.find('span', {'class': 'red'}).get_text() diploma = job.find('div', {'class': 'job-limit clearfix'}).find('p').get_text()[-2:] experience = job.find('div', {'class': 'job-limit clearfix'}).find('p').get_text()[:-2] labels = job.find('a', {'class': 'false-link'}).get_text() postlist.append([position,address,company,salary,diploma,experience,labels]) except IndexError: pass def main(): jobinfo = [] driver = webdriver.Chrome() url = "https://www.zhipin.com/c101280100/?query=%E7%88%AC%E8%99%AB&page=1&ka=page-1" driver.get(url) time.sleep(8) html = driver.page_source fillPostList(jobinfo,html) #將jobinfo列表信息寫入csv文件 headers = ["職位","工作地址","公司全稱","薪水","學歷","工作經驗","行業標簽"] with open('job.csv','w',newline = '')as f: f_csv = csv.writer(f) f_csv.writerow(headers) f_csv.writerows(jobinfo) driver.quit() main() ``` 3: ``` # Boss直聘 from bs4 import BeautifulSoup import requests import xlwt from selenium import webdriver from lxml import etree import time begin = int(input("輸入起始頁:")) end = int(input("輸入終止頁:")) url = "https://www.zhipin.com/c101280100/?query=%E7%88%AC%E8%99%AB&page=1&ka=page-1" base_url="https://www.zhipin.com" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4557.4 Safari/537.36', 'cookie': '__g=-; Hm_lvt_194df3105ad7148dcf2b98a91b5e727a=1628342274,1628476062,1628559147; lastCity=100010000; __c=1628559147; __l=l=%2Fwww.zhipin.com%2Fc100010000%2F%3Fpage%3D1%26ka%3Dpage-1&r=&g=&s=3&friend_source=0&s=3&friend_source=0; __a=51751789.1628342272.1628476062.1628559147.80.3.2.80; Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a=1628559375; __zp_stoken__=44fccEA1HA2tYaygfIi87Y39AOV8QMShFLTJsCThyVHN4TQUcEithWCZrdEBRdGB%2BT3s1cRw9fggUJQYnIEMHSE0rHHpfbE0yGiREN2IMbHcNX3s6dg5iIzgCdHxZREcDf1glTGc4AHw%2FcjoH', 'referer': 'https://www.zhipin.com/c100010000/?page=2&ka=page-2' } names = [] locations = [] salarys = [] requirements = [] educations = [] companys = [] links = [] items = [] for page in range(begin, end+1): param = { 'page': page } # response = requests.get(url, params=param, headers=headers) driver = webdriver.Chrome(r'C:\Program Files\Google\Chrome\Application\chromedriver.exe') # url = "https://www.zhipin.com/c101020100/e_102/?query=web%E5%89%8D%E7%AB%AF&page=1&ka=page-1" driver.get(url) time.sleep(8) # js = "window.open("+url+")" # driver.execute_script(js) html = driver.page_source #print(html) root=etree.HTML(html) name=root.xpath('//*[@id="main"]/div/div[2]/ul/li/div/div[1]/div[1]/div/div[1]/span[1]/a/text()') names.extend(name) location=root.xpath('// *[ @ id = "main"]/div/div[2]/ul/li/div/div[1]/div[1]/div/div[1]/span[2]/span/text()') locations.extend(location) salary=root.xpath('// *[ @ id = "main"] / div / div[2] / ul / li / div / div[1] / div[1] / div / div[2] / span/text()') salarys.extend(salary) requirement=root.xpath('// *[ @ id = "main"] / div / div[2] / ul / li / div / div[1] / div[1] / div / div[2] / p / text()[1]') requirements.extend(requirement) education=root.xpath('//*[@id="main"]/div/div[2]/ul/li/div/div[1]/div[1]/div/div[2]/p/text()[2]') educations.extend(education) company=root.xpath('// *[ @ id = "main"] / div / div[2] / ul / li / div / div[1] / div[2] / div / h3 / a/text()') companys.extend(company) link=root.xpath('//*[@id="main"]/div/div[2]/ul/li/div/div[1]/div[1]/div/div[1]/span[1]/a/@href') for i in range(0,len(link)): link[i]=base_url+link[i] links.extend(link) items.append(names) items.append(locations) items.append(salarys) items.append(requirements) items.append(educations) items.append(companys) items.append(links) #print(items) book = xlwt.Workbook(encoding='utf-8') sheet = book.add_sheet('items') head = ['職位名稱', '工作地點', '薪水', '工作經驗', '學歷', '公司','詳情鏈接'] for i in range(0, 7): sheet.write(0, i, head[i]) for i in range(0, 7): a = items[i] for j in range(len(a)): sheet.write(j + 1, i, a[j]) book.save('Boss直聘12.xls') ``` 4: ``` # -*- codeing = utf-8 -*- from bs4 import BeautifulSoup # 網頁解析,獲取數據 import re # 正則表達式,進行文字匹配` import urllib.request, urllib.error # 制定URL,獲取網頁數據 import xlwt # 進行excel操作 from Selenium4R import Chrome from selenium import webdriver import time import csv #import sqlite3 # 進行SQLite數據庫操作 begin = int(input("輸入起始頁:")) end = int(input("輸入終止頁:")) def main(): baseurl = "https://www.zhipin.com/c101280100/?query=%E7%88%AC%E8%99%AB&page=" #要爬取的網頁鏈接 # 1.爬取網頁 datalist = getData(baseurl) time_stamp = time.strftime('%m%d%H%M%S',time.localtime(time.time())) savepath = "boss直聘爬蟲崗"+time_stamp+".csv" #當前目錄新建XLS,存儲進去 # dbpath = "movie.db" #當前目錄新建數據庫,存儲進去 # 3.保存數據 saveData(datalist,savepath) #2種存儲方式可以只選擇一種 # 爬取網頁 def getData(baseurl): datalist = [] #用來存儲爬取的網頁信息 for i in range(begin, end+1): # 調用獲取頁面信息的函數,10次 url = baseurl + str(i) + '&ka=page-' + str(i) driver = webdriver.Chrome(r'C:\Program Files\Google\Chrome\Application\chromedriver.exe') driver.get(url) time.sleep(8) html = driver.page_source # 2.逐一解析數據 soup = BeautifulSoup(html, "html.parser") for job in soup.find_all('div', {"class": "job-primary"}): # 查找符合要求的字符串 data = [] # 保存一部電影所有信息 # item = str(item) position = job.find('span', {"class": "job-name"}).get_text() address = job.find('span', {'class': "job-area"}).get_text() company = job.find('div', {'class': 'company-text'}).find('h3', {'class': "name"}).get_text() salary = job.find('span', {'class': 'red'}).get_text() diploma = job.find('div', {'class': 'job-limit clearfix'}).find('p').get_text()[-2:] experience = job.find('div', {'class': 'job-limit clearfix'}).find('p').get_text()[:-2] labels = job.find('a', {'class': 'false-link'}).get_text() # company_status_result = re.search(r'<em class="vline"/>(.*?)<em class="vline"/>', job)[0] # if company_status_result: # company_status = company_status_result.group(1) # else: # company_status = '無信息' # data.append([position,address,company,salary,diploma,experience,labels]) data.append(position) data.append(address) data.append(company) data.append(salary) data.append(diploma) data.append(experience) data.append(labels) datalist.append(data) return datalist # 保存數據到表格 def saveData(datalist,savepath): print("save.......") print(datalist) # book = csv.Workbook(encoding="utf-8",style_compression=0) #創建workbook對象 # sheet = book.add_sheet('豆瓣電影Top250', cell_overwrite_ok=True) #創建工作表 # f = open(savepath,'w',encoding='utf-8',newline = '') # csv_writer = csv.writer(f) col = ("崗位名稱","招聘地點","企業名","薪資","學歷","經驗要求","類型") headers = ["職位","工作地址","公司全稱","薪水","學歷","工作經驗","行業標簽"] # csv_writer.writerow(headers) with open(savepath,'w',encoding='utf-8',newline = '') as f: f_csv = csv.writer(f) f_csv.writerow(headers) f_csv.writerows(datalist) # for i in range(0,7): # # csv_writer.writerow(0,i,col[i]) #列名 # for i in range(len(datalist)): # # print("第%d條" %(i+1)) #輸出語句,用來測試 # data = datalist[i] # for j in range(0,7): # csv_writer.writerows(data[j]) # csv_writer.writerow(i+1,j,data[j]) #數據 # book.save(savepath) #保存 if __name__ == "__main__": # 當程序執行時 # 調用函數 main() # init_db("movietest.db") print("爬取完畢!") ``` 6: ``` from pyspider.libs.base_handler import * import pymysql import random import time import re count = 0 class Handler(BaseHandler): # 添加請求頭,否則出現403報錯 crawl_config = {'headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}} def __init__(self): # 連接數據庫 self.db = pymysql.connect(host='127.0.0.1', user='root', password='774110919', port=3306, db='boss_job', charset='utf8mb4') def add_Mysql(self, id, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people): # 將數據寫入數據庫中 try: cursor = self.db.cursor() sql = 'insert into job(id, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people) values ("%d", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")' % (id, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people); print(sql) cursor.execute(sql) print(cursor.lastrowid) self.db.commit() except Exception as e: print(e) self.db.rollback() @every(minutes=24 * 60) def on_start(self): # 因為pyspider默認是HTTP請求,對于HTTPS(加密)請求,需要添加validate_cert=False,否則599/SSL報錯 self.crawl('https://www.zhipin.com/job_detail/?query=%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90&scity=100010000&industry=&position=', callback=self.index_page, validate_cert=False) @config(age=10 * 24 * 60 * 60) def index_page(self, response): time.sleep(random.randint(2, 5)) for i in response.doc('li > div').items(): # 設置全局變量 global count count += 1 # 崗位名稱 job_title = i('.job-title').text() print(job_title) # 崗位薪水 job_salary = i('.red').text() print(job_salary) # 崗位地點 city_result = re.search('(.*?)<em class=', i('.info-primary > p').html()) job_city = city_result.group(1).split(' ')[0] print(job_city) # 崗位經驗 experience_result = re.search('<em class="vline"/>(.*?)<em class="vline"/>', i('.info-primary > p').html()) job_experience = experience_result.group(1) print(job_experience) # 崗位學歷 job_education = i('.info-primary > p').text().replace(' ', '').replace(city_result.group(1).replace(' ', ''), '').replace(experience_result.group(1).replace(' ', ''),'') print(job_education) # 公司名稱 company_name = i('.info-company a').text() print(company_name) # 公司類型 company_type_result = re.search('(.*?)<em class=', i('.info-company p').html()) company_type = company_type_result.group(1) print(company_type) # 公司狀態 company_status_result = re.search('<em class="vline"/>(.*?)<em class="vline"/>', i('.info-company p').html()) if company_status_result: company_status = company_status_result.group(1) else: company_status = '無信息' print(company_status) # 公司規模 company_people = i('.info-company p').text().replace(company_type, '').replace(company_status,'') print(company_people + '\n') # 寫入數據庫中 self.add_Mysql(count, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people) # 獲取下一頁信息 next = response.doc('.next').attr.href if next != 'javascript:;': self.crawl(next, callback=self.index_page, validate_cert=False) else: print("The Work is Done") # 詳情頁信息獲取,由于訪問次數有限制,不使用 #for each in response.doc('.name > a').items(): #url = each.attr.href #self.crawl(each.attr.href, callback=self.detail_page, validate_cert=False) @config(priority=2) def detail_page(self, response): # 詳情頁信息獲取,由于訪問次數有限制,不使用 message_job = response.doc('div > .info-primary > p').text() city_result = re.findall('城市:(.*?)經驗', message_job) experience_result = re.findall('經驗:(.*?)學歷', message_job) education_result = re.findall('學歷:(.*)', message_job) message_company = response.doc('.info-company > p').text().replace(response.doc('.info-company > p > a').text(),'') status_result = re.findall('(.*?)\d', message_company.split(' ')[0]) people_result = message_company.split(' ')[0].replace(status_result[0], '') return { "job_title": response.doc('h1').text(), "job_salary": response.doc('.info-primary .badge').text(), "job_city": city_result[0], "job_experience": experience_result[0], "job_education": education_result[0], "job_skills": response.doc('.info-primary > .job-tags > span').text(), "job_detail": response.doc('div').filter('.text').eq(0).text().replace('\n', ''), "company_name": response.doc('.info-company > .name > a').text(), "company_status": status_result[0], "company_people": people_result, "company_type": response.doc('.info-company > p > a').text(), } ``` ***** 數據可視化: ``` import numpy as np import pandas as pd import matplotlib.pyplot as plt from matplotlib.pylab import mpl import matplotlib.font_manager as fm dfs = pd.read_csv('boss直聘爬蟲崗1217135602.csv',encoding='utf-8') data_df = pd.DataFrame(dfs) # df['prince'].fillna(df['prince'].mean()) # print("\n查看是否有缺失值\n", data_df.isnull().sum()) data_df_del_empty = data_df.dropna(subset=['職位'], axis=0) data_df_python_keyword = data_df_del_empty.loc[data_df_del_empty['職位'].str.contains('爬蟲|python|Python')] # print(data_df_python_keyword)#篩選帶有python的行 # 區間最小薪資 data_df_python_keyword_salary = data_df_python_keyword['薪水'].str.split('-', expand=True)[0] + 'K' # print(data_df_python_keyword_salary) # 區間最小薪資 # Dataframe新增一列 在第 列新增一列名為' ' 的一列 數據 data_df_python_keyword.insert(7, '最小薪資', data_df_python_keyword_salary) # print(data_df_python_keyword['學歷']) Fre_f=pd.DataFrame(dfs["學歷"].value_counts()) Fre_x=data_df_python_keyword["最小薪資"].value_counts() def Bar_1(data,title,is_a): #設置全景中文字體 my_font=fm.FontProperties(fname="C:/Windows/Fonts/msyhl.ttc") mpl.rcParams['font.sans-serif'] = my_font.get_name() mpl.rcParams["axes.unicode_minus"] = False #畫直方圖 #定義圖片大小 p=plt.figure(figsize=(20,8),dpi=300) ax=p.add_subplot(1,1,1) #創建一個1行1列的子圖,并開始繪制第1幅 #去掉子圖的上,右邊框 ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) x=list(data.index) if(is_a == 1): y=list(data) else: y=list(data['學歷']) plt.bar(range(len(x)),y,color="#4CAF50",width = 0.5) plt.xticks(range(len(x)),x,font=my_font,fontsize=8,color="red") plt.yticks(font=my_font,fontsize=8,color="#006400") #定義背景網格線 plt.grid(axis="y",linestyle="--",color="#FFA500",alpha=0.5) #加上注釋()中的屬性分別為文本內容,注釋坐標,文本坐標 color_list=["#4B0082","#2F4F4F","#32CD32","#808000","#B22222","#808000"] # plt.show() # print(y[0]) # return; for i in range(len(y)): plt.annotate(y[i],xy=(i,y[i]),xytext=(i,y[i]),font=my_font,fontsize=8,color="#808000") #加上x,y軸的標簽 plt.xlabel("\n學歷",font=my_font,fontsize=20) plt.ylabel("招聘人數\n",font=my_font,fontsize=20) #加上標題 plt.title(title,font=my_font,fontsize=15,color="#FFD700") # plt.savefig("C:/Users/HUAWEI/Desktop/大數據就業與學歷關系直方圖.png") plt.show() def Bie_1(data,title,is_a): plt.figure(figsize=(10,5),dpi=150) #調節圖形大小 x=list(data.index) if(is_a == 1): y=list(data) else: y=list(data['學歷']) labels = x #定義標簽 sizes = y #每塊值 colors = ['red','yellowgreen','lightskyblue','yellow','blue'] #每塊顏色定義 explode = (0.1,0.05,0.05,0.05,1.2) #將某一塊分割出來,值越大分割出的間隙越大 patches,text1,text2 = plt.pie(sizes, labels=labels, autopct = '%3.2f%%', #數值保留固定小數位 shadow = False, #無陰影設置 startangle =0, #逆時針起始角度設置 pctdistance = 0.6) #數值距圓心半徑倍數的距離 #patches餅圖的返回值,texts1餅圖外label的文本,texts2餅圖內部的文本 # x,y軸刻度設置一致,保證餅圖為圓形 plt.axis('equal') #設置圖列 my_font1=fm.FontProperties(fname="C:/Windows/Fonts/msyhl.ttc",size=10) plt.legend(prop=my_font1) #設置標題 my_font2=fm.FontProperties(fname="C:/Windows/Fonts/msyhl.ttc",size=20) plt.title(title,font=my_font2,color='#32CD32') plt.show() Bie_1(Fre_f,"爬蟲就業與學歷關系",0) # Bar_1(Fre_x,"爬蟲就業與學歷關系",1) # annotate 主要是添加注釋,如柱狀圖頂部添加數字注釋等 # bar 主要用來繪制柱形圖 # value_counts()是一種查看表格某列中有多少個不同值的快捷方法,并計算每個不同值有在該列中有多少重復值。 ```
                  <ruby id="bdb3f"></ruby>

                  <p id="bdb3f"><cite id="bdb3f"></cite></p>

                    <p id="bdb3f"><cite id="bdb3f"><th id="bdb3f"></th></cite></p><p id="bdb3f"></p>
                      <p id="bdb3f"><cite id="bdb3f"></cite></p>

                        <pre id="bdb3f"></pre>
                        <pre id="bdb3f"><del id="bdb3f"><thead id="bdb3f"></thead></del></pre>

                        <ruby id="bdb3f"><mark id="bdb3f"></mark></ruby><ruby id="bdb3f"></ruby>
                        <pre id="bdb3f"><pre id="bdb3f"><mark id="bdb3f"></mark></pre></pre><output id="bdb3f"></output><p id="bdb3f"></p><p id="bdb3f"></p>

                        <pre id="bdb3f"><del id="bdb3f"><progress id="bdb3f"></progress></del></pre>

                              <ruby id="bdb3f"></ruby>

                              哎呀哎呀视频在线观看