utils/node_modules/@microsoft/tsdoc/lib-commonjs/parser/Tokenizer.js

146 lines
6.1 KiB
JavaScript
Raw Normal View History

2024-02-07 01:33:07 -05:00
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Tokenizer = void 0;
var TextRange_1 = require("./TextRange");
var Token_1 = require("./Token");
var Tokenizer = /** @class */ (function () {
function Tokenizer() {
}
/**
* Given a list of input lines, this returns an array of extracted tokens.
* The last token will always be TokenKind.EndOfInput.
*/
Tokenizer.readTokens = function (lines) {
Tokenizer._ensureInitialized();
var tokens = [];
var lastLine = undefined;
for (var _i = 0, lines_1 = lines; _i < lines_1.length; _i++) {
var line = lines_1[_i];
Tokenizer._pushTokensForLine(tokens, line);
lastLine = line;
}
if (lastLine) {
tokens.push(new Token_1.Token(Token_1.TokenKind.EndOfInput, lastLine.getNewRange(lastLine.end, lastLine.end), lastLine));
}
else {
tokens.push(new Token_1.Token(Token_1.TokenKind.EndOfInput, TextRange_1.TextRange.empty, TextRange_1.TextRange.empty));
}
return tokens;
};
/**
* Returns true if the token is a CommonMark punctuation character.
* These are basically all the ASCII punctuation characters.
*/
Tokenizer.isPunctuation = function (tokenKind) {
Tokenizer._ensureInitialized();
return Tokenizer._punctuationTokens[tokenKind] || false;
};
Tokenizer._pushTokensForLine = function (tokens, line) {
var buffer = line.buffer;
var end = line.end;
var bufferIndex = line.pos;
var tokenKind = undefined;
var tokenPos = bufferIndex;
while (bufferIndex < end) {
// Read a character and determine its kind
var charCode = buffer.charCodeAt(bufferIndex);
var characterKind = Tokenizer._charCodeMap[charCode];
if (characterKind === undefined) {
characterKind = Token_1.TokenKind.Other;
}
// Can we append to an existing token? Yes if:
// 1. There is an existing token, AND
// 2. It is the same kind of token, AND
// 3. It's not punctuation (which is always one character)
if (tokenKind !== undefined &&
characterKind === tokenKind &&
Tokenizer._isMultiCharacterToken(tokenKind)) {
// yes, append
}
else {
// Is there a previous completed token to push?
if (tokenKind !== undefined) {
tokens.push(new Token_1.Token(tokenKind, line.getNewRange(tokenPos, bufferIndex), line));
}
tokenPos = bufferIndex;
tokenKind = characterKind;
}
++bufferIndex;
}
// Is there a previous completed token to push?
if (tokenKind !== undefined) {
tokens.push(new Token_1.Token(tokenKind, line.getNewRange(tokenPos, bufferIndex), line));
}
tokens.push(new Token_1.Token(Token_1.TokenKind.Newline, line.getNewRange(line.end, line.end), line));
};
/**
* Returns true if the token can be comprised of multiple characters
*/
Tokenizer._isMultiCharacterToken = function (kind) {
switch (kind) {
case Token_1.TokenKind.Spacing:
case Token_1.TokenKind.AsciiWord:
case Token_1.TokenKind.Other:
return true;
}
return false;
};
Tokenizer._ensureInitialized = function () {
if (Tokenizer._charCodeMap) {
return;
}
Tokenizer._charCodeMap = {};
Tokenizer._punctuationTokens = {};
// All Markdown punctuation characters
var punctuation = Tokenizer._commonMarkPunctuationCharacters;
for (var i = 0; i < punctuation.length; ++i) {
var charCode = punctuation.charCodeAt(i);
Tokenizer._charCodeMap[charCode] = Token_1.TokenKind.OtherPunctuation;
}
// Special symbols
// !"#$%&\'()*+,\-.\/:;<=>?@[\\]^_`{|}~
var specialMap = {
'\\': Token_1.TokenKind.Backslash,
'<': Token_1.TokenKind.LessThan,
'>': Token_1.TokenKind.GreaterThan,
'=': Token_1.TokenKind.Equals,
"'": Token_1.TokenKind.SingleQuote,
'"': Token_1.TokenKind.DoubleQuote,
'/': Token_1.TokenKind.Slash,
'-': Token_1.TokenKind.Hyphen,
'@': Token_1.TokenKind.AtSign,
'{': Token_1.TokenKind.LeftCurlyBracket,
'}': Token_1.TokenKind.RightCurlyBracket,
'`': Token_1.TokenKind.Backtick,
'.': Token_1.TokenKind.Period,
':': Token_1.TokenKind.Colon,
',': Token_1.TokenKind.Comma,
'[': Token_1.TokenKind.LeftSquareBracket,
']': Token_1.TokenKind.RightSquareBracket,
'|': Token_1.TokenKind.Pipe,
'(': Token_1.TokenKind.LeftParenthesis,
')': Token_1.TokenKind.RightParenthesis,
'#': Token_1.TokenKind.PoundSymbol,
'+': Token_1.TokenKind.Plus,
$: Token_1.TokenKind.DollarSign
};
for (var _i = 0, _a = Object.getOwnPropertyNames(specialMap); _i < _a.length; _i++) {
var key = _a[_i];
Tokenizer._charCodeMap[key.charCodeAt(0)] = specialMap[key];
Tokenizer._punctuationTokens[specialMap[key]] = true;
}
Tokenizer._punctuationTokens[Token_1.TokenKind.OtherPunctuation] = true;
var word = Tokenizer._wordCharacters;
for (var i = 0; i < word.length; ++i) {
var charCode = word.charCodeAt(i);
Tokenizer._charCodeMap[charCode] = Token_1.TokenKind.AsciiWord;
}
Tokenizer._charCodeMap[' '.charCodeAt(0)] = Token_1.TokenKind.Spacing;
Tokenizer._charCodeMap['\t'.charCodeAt(0)] = Token_1.TokenKind.Spacing;
};
Tokenizer._commonMarkPunctuationCharacters = '!"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~';
Tokenizer._wordCharacters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_';
return Tokenizer;
}());
exports.Tokenizer = Tokenizer;
//# sourceMappingURL=Tokenizer.js.map