5.sizzle后續 · jQuery源碼構架

[toc] ## select 函數前面已經介紹了 tokensize 函數的功能，已經生成了一個 tokens 數組，而且對它的組成我們也做了介紹，下面就是介紹對這個 tokens 數組如何處理。 DOM 元素之間的連接關系大概有 > + ~ 幾種，包括空格，而 tokens 數組中是 type 是有 tag、attr 和連接符之分的，區分它們 Sizzle 也是有一套規則的，比如上一章我們所講的 Expr 對象，它真的非常重要： ``` Expr.relative = { ">": { dir: "parentNode", first: true }, " ": { dir: "parentNode" }, "+": { dir: "previousSibling", first: true }, "~": { dir: "previousSibling" } }; ``` Expr.relative 標記用來將連接符區分，對其種類又根據目錄進行劃分。現在我們再來理一理 tokens 數組，這個數組目前是一個多重數組，現在不考慮逗號的情況，暫定只有一個分支。如果我們使用從右向左的匹配方式的話，div > div.seq h2 ~ p，會先得到 type 為 TAG 的 token，而對于 type 為 ~ 的 token 我們已經可以用 relative 對象來判斷，現在來介紹 Expr.find 對象： ``` Expr.find = {}; Expr.find['ID'] = function( id, context ) { if ( typeof context.getElementById !== "undefined" && documentIsHTML ) { var elem = context.getElementById( id ); return elem ? [ elem ] : []; } }; Expr.find["CLASS"] = support.getElementsByClassName && function( className, context ) { if ( typeof context.getElementsByClassName !== "undefined" && documentIsHTML ) { return context.getElementsByClassName( className ); } }; Expr.find["TAG"] = function(){...}; ``` 實際上 jQuery 的源碼還考慮到了兼容性，這里以 `find["ID"]` 介紹： ``` if(support.getById){ Expr.find['ID'] = function(){...}; // 上面 }else{ // 兼容 IE 6、7 Expr.find["ID"] = function( id, context ) { if ( typeof context.getElementById !== "undefined" && documentIsHTML ) { var node, i, elems, elem = context.getElementById( id ); if ( elem ) { // Verify the id attribute node = elem.getAttributeNode("id"); if ( node && node.value === id ) { return [ elem ]; } // Fall back on getElementsByName elems = context.getElementsByName( id ); i = 0; while ( (elem = elems[i++]) ) { node = elem.getAttributeNode("id"); if ( node && node.value === id ) { return [ elem ]; } } } return []; } }; } ``` 可以對 find 對象進行簡化： ``` Expr.find = { "ID": document.getElementById, "CLASS": document.getElementsByClassName, "TAG": document.getElementsByTagName } ``` 以后還會介紹 Expr.filter。 ## select 源碼源碼之前，來看幾個正則表達式。 ``` var runescape = /\$[\da-f]{1,6}[\x20\t\r\n\f]?|([\x20\t\r\n\f])|.)/gi //這個正則是用來對轉義字符特殊處理，帶個反斜杠的 token runescape.exec('\\ab'); //["\ab", "ab", undefined] var rsibling = /[+~]/; //匹配 +、~ matchExpr['needsContext'] = /^[\x20\t\r\n\f]*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\([\x20\t\r\n\f]*((?:-\d)?\d*)[\x20\t\r\n\f]*$|)(?=[^-]|$)/i //needsContext 用來匹配不完整的 selector matchExpr['needsContext'].test(' + p')//true matchExpr['needsContext'].test(':first-child p')//true //這個不完整，可能是由于抽調 #ID 導致的 ``` 而對于 runescape 正則，往往都是配合 replace 來使用： ``` var str = '\\ab'; str.replace(runescape, funescape); var funescape = function (_, escaped, escapedWhitespace) { var high = "0x" + escaped - 0x10000; // NaN means non-codepoint // Support: Firefox<24 // Workaround erroneous numeric interpretation of +"0x" return high !== high || escapedWhitespace ? escaped : high < 0 ? // BMP codepoint String.fromCharCode(high + 0x10000) : // Supplemental Plane codepoint (surrogate pair) String.fromCharCode(high >> 10 | 0xD800, high & 0x3FF | 0xDC00); } ``` 我完全看不懂啦，你們自己意會去吧，O(∩_∩)O哈哈~ ``` var select = Sizzle.select = function (selector, context, results, seed) { var i, tokens, token, type, find, compiled = typeof selector === "function" && selector, match = !seed && tokenize((selector = compiled.selector || selector)); results = results || []; // 長度為 1，即表示沒有逗號，Sizzle 嘗試對此情況優化 if (match.length === 1) { tokens = match[0] = match[0].slice(0); // 第一個 TAG 為一個 ID 選擇器，設置快速查找 if (tokens.length > 2 && (token = tokens[0]).type === "ID" && context.nodeType === 9 && documentIsHTML && Expr.relative[tokens[1].type]) { //將新 context 設置成那個 ID context = (Expr.find["ID"](token.matches[0].replace(runescape, funescape), context) || [])[0]; if (!context) { // 第一個 ID 都找不到就直接返回 return results; // 此時 selector 為 function，應該有特殊用途 } else if (compiled) { context = context.parentNode; } selector = selector.slice(tokens.shift().value.length); } // 在沒有 CHILD 的情況，從右向左，仍然是對性能的優化 i = matchExpr["needsContext"].test(selector) ? 0 : tokens.length; while (i--) { token = tokens[i]; // 碰到 +~ 等符號先停止 if (Expr.relative[(type = token.type)]) { break; } if ((find = Expr.find[type])) { // Search, expanding context for leading sibling combinators if ((seed = find( token.matches[0].replace(runescape, funescape), rsibling.test(tokens[0].type) && testContext(context.parentNode) || context))) { // testContext 是判斷 getElementsByTagName 是否存在 // If seed is empty or no tokens remain, we can return early tokens.splice(i, 1); selector = seed.length && toSelector(tokens); //selector 為空，表示到頭，直接返回 if (!selector) { push.apply(results, seed); return results; } break; } } } } // Compile and execute a filtering function if one is not provided // Provide `match` to avoid retokenization if we modified the selector above (compiled || compile(selector, match))( seed, context, !documentIsHTML, results, !context || rsibling.test(selector) && testContext(context.parentNode) || context); return results; } ``` toSelector 函數是將 tokens 除去已經選擇的將剩下的拼接成字符串： ``` function toSelector(tokens) { var i = 0, len = tokens.length, selector = ""; for (; i < len; i++) { selector += tokens[i].value; } return selector; } ``` 在最后又多出一個 compile 函數，是 Sizzle 的編譯函數，下章講。到目前為止，該優化的都已經優化了，selector 和 context，還有 seed，而且如果執行到 compile 函數，這幾個變量的狀態： 1. selector 可能已經不上最初那個，經過各種去頭去尾； 2. match 沒變，仍是 tokensize 的結果； 3. seed 事種子集合，所有等待匹配 DOM 的集合； 4. context 可能已經是頭（#ID）； 5. results 沒變。可能，你也發現了，其實 compile 是一個異步函數 compile()()。 ## 總結 select 大概干了幾件事， 1. 將 tokenize 處理 selector 的結果賦給 match，所以 match 實為 tokens 數組； 2. 在長度為 1，且第一個 token 為 ID 的情況下，對 context 進行優化，把 ID 匹配到的元素賦給 context； 3. 若不含 needsContext 正則，則生成一個 seed 集合，為所有的最右 DOM 集合； 4. 最后事 compile 函數，參數真多...