## 如何把采集結果存入mysql
<http://www.jishubu.net/yunwei/python/424.html>
pyspider是個非常強大簡單易用的爬蟲框架,但是默認軟件會把采集的所有字段打包保存到默認的數據庫中,和其它軟件沒法整合。現在需求是需要把采集的字段做為單獨的字段保存到自定義的mysql數據庫中,本人技術能力有限,個人感覺實現方法不是最優的,大家有能力的請自行改進,沒能力的湊合著用吧。或是直接下載py腳本:把 pyspider的結果存入自定義的mysql數據庫中[mysqldb.zip](http://www.jishubu.net/wp-content/plugins/wp-ueditor/ueditor/php/upload/8521423797887.zip)
~~~
pyspider結果保存到數據庫簡單樣例。
使用方法:
????1,把本文件放到pyspider/pyspider/database/mysql/目錄下命名為mysqldb.py。
????2,修改本文件的數據庫配置參數及建立相應的表和庫。
????3,在腳本文件里使用from pyspider.database.mysql.mysqldb import SQL引用本代碼.
????4,重寫on_result方法,實例化sql并調用replace(replace方法參數第一個是表名,第二個是結果。)。簡單例子如下:
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2015-01-26 13:12:04
# Project: jishubu.net
????
from pyspider.libs.base_handler import *
from pyspider.database.mysql.mysqldb import SQL
????
????
class Handler(BaseHandler):
????crawl_config = {
????}
????
????@every(minutes=24 * 60)
????def on_start(self):
????????self.crawl('http://www.jishubu.net/', callback=self.index_page)
????
????@config(age=10 * 24 * 60 * 60)
????def index_page(self, response):
????????for each in response.doc('p.pic a[href^="http"]').items():
????????????print each.attr.href
???????????????
????@config(priority=2)
????def detail_page(self, response):
????????return {
????????????"url": response.url,
????????????"title": response.doc('HTML>BODY#presc>DIV.main>DIV.prices_box.wid980.clearfix>DIV.detail_box>DL.assort.tongyong>DD>A').text(),
????????}
????def on_result(self, result):
????????#print result
????????if not result or not result['title']:
????????????return
????????sql = SQL()
????????sql.replace('info',**result)
'''
from six import itervalues
import mysql.connector
from datetime import date, datetime, timedelta
????
class SQL:
????
????????username = 'pyspider'?? #數據庫用戶名
????????password = 'pyspider'?? #數據庫密碼
????????database = 'result'???? #數據庫
????????host = 'localhost'????? #數據庫主機地址
????????connection = ''
????????connect = True
????placeholder = '%s'
????
????????def __init__(self):
????????????????if self.connect:
????????????????????????SQL.connect(self)
????def escape(self,string):
????????return '`%s`' % string
????????def connect(self):
????????????config = {
????????????????'user':SQL.username,
????????????????'password':SQL.password,
????????????????'host':SQL.host
????????????}
????????????if SQL.database != None:
????????????????config['database'] = SQL.database
????
????????????try:
????????????????cnx = mysql.connector.connect(**config)
????????????????SQL.connection = cnx
????????????????return True
????????????except mysql.connector.Error as err:
????
????????????if (err.errno == errorcode.ER_ACCESS_DENIED_ERROR):
????????????????print "The credentials you provided are not correct."
????????????elif (err.errno == errorcode.ER_BAD_DB_ERROR):
????????????????print "The database you provided does not exist."
????????????else:
????????????????print "Something went wrong: " , err
????????????return False
????
????
????def replace(self,tablename=None,**values):
????????if SQL.connection == '':
????????????????????print "Please connect first"
????????????????????return False
????
????????????????tablename = self.escape(tablename )
????????????????if values:
????????????????????????_keys = ", ".join(self.escape(k) for k in values)
????????????????????????_values = ", ".join([self.placeholder, ] * len(values))
????????????????????????sql_query = "REPLACE INTO %s (%s) VALUES (%s)" % (tablename, _keys, _values)
????????????????else:
????????????????????????sql_query = "REPLACE INTO %s DEFAULT VALUES" % tablename
????
????????????????????
????????cur = SQL.connection.cursor()
????????????????try:
????????????????????if values:
????????????????????????????cur.execute(sql_query, list(itervalues(values)))
????????????????????else:
????????????????????????????cur.execute(sql_query)
????????????????????SQL.connection.commit()
????????????????????return True
????????????????except mysql.connector.Error as err:
????????????????????print ("An error occured: {}".format(err))
????????????????????return False
~~~
## module :No module named mysqldb
`http://ftp.ntu.edu.tw/MySQL/Downloads/Connector-Python/`