580 lines
21 KiB
JavaScript
580 lines
21 KiB
JavaScript
/******************************************************************************
|
|
* Copyright 2021-2022 TypeFox GmbH
|
|
* This program and the accompanying materials are made available under the
|
|
* terms of the MIT License, which is available in the project root.
|
|
******************************************************************************/
|
|
import { assertUnreachable } from '../utils/errors.js';
|
|
import * as ast from '../languages/generated/ast.js';
|
|
import { isCompositeCstNode } from '../syntax-tree.js';
|
|
import { getContainerOfType, streamAllContents } from './ast-utils.js';
|
|
import { streamCst } from './cst-utils.js';
|
|
import { escapeRegExp, isWhitespace } from './regexp-utils.js';
|
|
/**
|
|
* Returns the entry rule of the given grammar, if any. If the grammar file does not contain an entry rule,
|
|
* the result is `undefined`.
|
|
*/
|
|
export function getEntryRule(grammar) {
|
|
return grammar.rules.find(e => ast.isParserRule(e) && e.entry);
|
|
}
|
|
/**
|
|
* Returns all hidden terminal rules of the given grammar, if any.
|
|
*/
|
|
export function getHiddenRules(grammar) {
|
|
return grammar.rules.filter((e) => ast.isTerminalRule(e) && e.hidden);
|
|
}
|
|
/**
|
|
* Returns all rules that can be reached from the topmost rules of the specified grammar (entry and hidden terminal rules).
|
|
*
|
|
* @param grammar The grammar that contains all rules
|
|
* @param allTerminals Whether or not to include terminals that are referenced only by other terminals
|
|
* @returns A list of referenced parser and terminal rules. If the grammar contains no entry rule,
|
|
* this function returns all rules of the specified grammar.
|
|
*/
|
|
export function getAllReachableRules(grammar, allTerminals) {
|
|
const ruleNames = new Set();
|
|
const entryRule = getEntryRule(grammar);
|
|
if (!entryRule) {
|
|
return new Set(grammar.rules);
|
|
}
|
|
const topMostRules = [entryRule].concat(getHiddenRules(grammar));
|
|
for (const rule of topMostRules) {
|
|
ruleDfs(rule, ruleNames, allTerminals);
|
|
}
|
|
const rules = new Set();
|
|
for (const rule of grammar.rules) {
|
|
if (ruleNames.has(rule.name) || (ast.isTerminalRule(rule) && rule.hidden)) {
|
|
rules.add(rule);
|
|
}
|
|
}
|
|
return rules;
|
|
}
|
|
function ruleDfs(rule, visitedSet, allTerminals) {
|
|
visitedSet.add(rule.name);
|
|
streamAllContents(rule).forEach(node => {
|
|
if (ast.isRuleCall(node) || (allTerminals && ast.isTerminalRuleCall(node))) {
|
|
const refRule = node.rule.ref;
|
|
if (refRule && !visitedSet.has(refRule.name)) {
|
|
ruleDfs(refRule, visitedSet, allTerminals);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
/**
|
|
* Determines the grammar expression used to parse a cross-reference (usually a reference to a terminal rule).
|
|
* A cross-reference can declare this expression explicitly in the form `[Type : Terminal]`, but if `Terminal`
|
|
* is omitted, this function attempts to infer it from the name of the referenced `Type` (using `findNameAssignment`).
|
|
*
|
|
* Returns the grammar expression used to parse the given cross-reference, or `undefined` if it is not declared
|
|
* and cannot be inferred.
|
|
*/
|
|
export function getCrossReferenceTerminal(crossRef) {
|
|
if (crossRef.terminal) {
|
|
return crossRef.terminal;
|
|
}
|
|
else if (crossRef.type.ref) {
|
|
const nameAssigment = findNameAssignment(crossRef.type.ref);
|
|
return nameAssigment === null || nameAssigment === void 0 ? void 0 : nameAssigment.terminal;
|
|
}
|
|
return undefined;
|
|
}
|
|
/**
|
|
* Determines whether the given terminal rule represents a comment. This is true if the rule is marked
|
|
* as `hidden` and it does not match white space. This means every hidden token (i.e. excluded from the AST)
|
|
* that contains visible characters is considered a comment.
|
|
*/
|
|
export function isCommentTerminal(terminalRule) {
|
|
return terminalRule.hidden && !isWhitespace(terminalRegex(terminalRule));
|
|
}
|
|
/**
|
|
* Find all CST nodes within the given node that contribute to the specified property.
|
|
*
|
|
* @param node A CST node in which to look for property assignments. If this is undefined, the result is an empty array.
|
|
* @param property A property name of the constructed AST node. If this is undefined, the result is an empty array.
|
|
*/
|
|
export function findNodesForProperty(node, property) {
|
|
if (!node || !property) {
|
|
return [];
|
|
}
|
|
return findNodesForPropertyInternal(node, property, node.astNode, true);
|
|
}
|
|
/**
|
|
* Find a single CST node within the given node that contributes to the specified property.
|
|
*
|
|
* @param node A CST node in which to look for property assignments. If this is undefined, the result is `undefined`.
|
|
* @param property A property name of the constructed AST node. If this is undefined, the result is `undefined`.
|
|
* @param index If no index is specified or the index is less than zero, the first found node is returned. If the
|
|
* specified index exceeds the number of assignments to the property, the last found node is returned. Otherwise,
|
|
* the node with the specified index is returned.
|
|
*/
|
|
export function findNodeForProperty(node, property, index) {
|
|
if (!node || !property) {
|
|
return undefined;
|
|
}
|
|
const nodes = findNodesForPropertyInternal(node, property, node.astNode, true);
|
|
if (nodes.length === 0) {
|
|
return undefined;
|
|
}
|
|
if (index !== undefined) {
|
|
index = Math.max(0, Math.min(index, nodes.length - 1));
|
|
}
|
|
else {
|
|
index = 0;
|
|
}
|
|
return nodes[index];
|
|
}
|
|
function findNodesForPropertyInternal(node, property, element, first) {
|
|
if (!first) {
|
|
const nodeFeature = getContainerOfType(node.grammarSource, ast.isAssignment);
|
|
if (nodeFeature && nodeFeature.feature === property) {
|
|
return [node];
|
|
}
|
|
}
|
|
if (isCompositeCstNode(node) && node.astNode === element) {
|
|
return node.content.flatMap(e => findNodesForPropertyInternal(e, property, element, false));
|
|
}
|
|
return [];
|
|
}
|
|
/**
|
|
* Find all CST nodes within the given node that correspond to the specified keyword.
|
|
*
|
|
* @param node A CST node in which to look for keywords. If this is undefined, the result is an empty array.
|
|
* @param keyword A keyword as specified in the grammar.
|
|
*/
|
|
export function findNodesForKeyword(node, keyword) {
|
|
if (!node) {
|
|
return [];
|
|
}
|
|
return findNodesForKeywordInternal(node, keyword, node === null || node === void 0 ? void 0 : node.astNode);
|
|
}
|
|
/**
|
|
* Find a single CST node within the given node that corresponds to the specified keyword.
|
|
*
|
|
* @param node A CST node in which to look for keywords. If this is undefined, the result is `undefined`.
|
|
* @param keyword A keyword as specified in the grammar.
|
|
* @param index If no index is specified or the index is less than zero, the first found node is returned. If the
|
|
* specified index exceeds the number of keyword occurrences, the last found node is returned. Otherwise,
|
|
* the node with the specified index is returned.
|
|
*/
|
|
export function findNodeForKeyword(node, keyword, index) {
|
|
if (!node) {
|
|
return undefined;
|
|
}
|
|
const nodes = findNodesForKeywordInternal(node, keyword, node === null || node === void 0 ? void 0 : node.astNode);
|
|
if (nodes.length === 0) {
|
|
return undefined;
|
|
}
|
|
if (index !== undefined) {
|
|
index = Math.max(0, Math.min(index, nodes.length - 1));
|
|
}
|
|
else {
|
|
index = 0;
|
|
}
|
|
return nodes[index];
|
|
}
|
|
export function findNodesForKeywordInternal(node, keyword, element) {
|
|
if (node.astNode !== element) {
|
|
return [];
|
|
}
|
|
if (ast.isKeyword(node.grammarSource) && node.grammarSource.value === keyword) {
|
|
return [node];
|
|
}
|
|
const treeIterator = streamCst(node).iterator();
|
|
let result;
|
|
const keywordNodes = [];
|
|
do {
|
|
result = treeIterator.next();
|
|
if (!result.done) {
|
|
const childNode = result.value;
|
|
if (childNode.astNode === element) {
|
|
if (ast.isKeyword(childNode.grammarSource) && childNode.grammarSource.value === keyword) {
|
|
keywordNodes.push(childNode);
|
|
}
|
|
}
|
|
else {
|
|
treeIterator.prune();
|
|
}
|
|
}
|
|
} while (!result.done);
|
|
return keywordNodes;
|
|
}
|
|
/**
|
|
* If the given CST node was parsed in the context of a property assignment, the respective `Assignment` grammar
|
|
* node is returned. If no assignment is found, the result is `undefined`.
|
|
*
|
|
* @param cstNode A CST node for which to find a property assignment.
|
|
*/
|
|
export function findAssignment(cstNode) {
|
|
var _a;
|
|
const astNode = cstNode.astNode;
|
|
// Only search until the ast node of the parent cst node is no longer the original ast node
|
|
// This would make us jump to a preceding rule call, which contains only unrelated assignments
|
|
while (astNode === ((_a = cstNode.container) === null || _a === void 0 ? void 0 : _a.astNode)) {
|
|
const assignment = getContainerOfType(cstNode.grammarSource, ast.isAssignment);
|
|
if (assignment) {
|
|
return assignment;
|
|
}
|
|
cstNode = cstNode.container;
|
|
}
|
|
return undefined;
|
|
}
|
|
/**
|
|
* Find an assignment to the `name` property for the given grammar type. This requires the `type` to be inferred
|
|
* from a parser rule, and that rule must contain an assignment to the `name` property. In all other cases,
|
|
* this function returns `undefined`.
|
|
*/
|
|
export function findNameAssignment(type) {
|
|
let startNode = type;
|
|
if (ast.isInferredType(startNode)) {
|
|
// for inferred types, the location to start searching for the name-assignment is different
|
|
if (ast.isAction(startNode.$container)) {
|
|
// a type which is explicitly inferred by an action: investigate the sibbling of the Action node, i.e. start searching at the Action's parent
|
|
startNode = startNode.$container.$container;
|
|
}
|
|
else if (ast.isParserRule(startNode.$container)) {
|
|
// investigate the parser rule with the explicitly inferred type
|
|
startNode = startNode.$container;
|
|
}
|
|
else {
|
|
assertUnreachable(startNode.$container);
|
|
}
|
|
}
|
|
return findNameAssignmentInternal(type, startNode, new Map());
|
|
}
|
|
function findNameAssignmentInternal(type, startNode, cache) {
|
|
var _a;
|
|
// the cache is only required to prevent infinite loops
|
|
function go(node, refType) {
|
|
let childAssignment = undefined;
|
|
const parentAssignment = getContainerOfType(node, ast.isAssignment);
|
|
// No parent assignment implies unassigned rule call
|
|
if (!parentAssignment) {
|
|
childAssignment = findNameAssignmentInternal(refType, refType, cache);
|
|
}
|
|
cache.set(type, childAssignment);
|
|
return childAssignment;
|
|
}
|
|
if (cache.has(type)) {
|
|
return cache.get(type);
|
|
}
|
|
cache.set(type, undefined);
|
|
for (const node of streamAllContents(startNode)) {
|
|
if (ast.isAssignment(node) && node.feature.toLowerCase() === 'name') {
|
|
cache.set(type, node);
|
|
return node;
|
|
}
|
|
else if (ast.isRuleCall(node) && ast.isParserRule(node.rule.ref)) {
|
|
return go(node, node.rule.ref);
|
|
}
|
|
else if (ast.isSimpleType(node) && ((_a = node.typeRef) === null || _a === void 0 ? void 0 : _a.ref)) {
|
|
return go(node, node.typeRef.ref);
|
|
}
|
|
}
|
|
return undefined;
|
|
}
|
|
export function getActionAtElement(element) {
|
|
const parent = element.$container;
|
|
if (ast.isGroup(parent)) {
|
|
const elements = parent.elements;
|
|
const index = elements.indexOf(element);
|
|
for (let i = index - 1; i >= 0; i--) {
|
|
const item = elements[i];
|
|
if (ast.isAction(item)) {
|
|
return item;
|
|
}
|
|
else {
|
|
const action = streamAllContents(elements[i]).find(ast.isAction);
|
|
if (action) {
|
|
return action;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (ast.isAbstractElement(parent)) {
|
|
return getActionAtElement(parent);
|
|
}
|
|
else {
|
|
return undefined;
|
|
}
|
|
}
|
|
export function isOptionalCardinality(cardinality, element) {
|
|
return cardinality === '?' || cardinality === '*' || (ast.isGroup(element) && Boolean(element.guardCondition));
|
|
}
|
|
export function isArrayCardinality(cardinality) {
|
|
return cardinality === '*' || cardinality === '+';
|
|
}
|
|
export function isArrayOperator(operator) {
|
|
return operator === '+=';
|
|
}
|
|
/**
|
|
* Determines whether the given parser rule is a _data type rule_, meaning that it has a
|
|
* primitive return type like `number`, `boolean`, etc.
|
|
*/
|
|
export function isDataTypeRule(rule) {
|
|
return isDataTypeRuleInternal(rule, new Set());
|
|
}
|
|
function isDataTypeRuleInternal(rule, visited) {
|
|
if (visited.has(rule)) {
|
|
return true;
|
|
}
|
|
else {
|
|
visited.add(rule);
|
|
}
|
|
for (const node of streamAllContents(rule)) {
|
|
if (ast.isRuleCall(node)) {
|
|
if (!node.rule.ref) {
|
|
// RuleCall to unresolved rule. Don't assume `rule` is a DataType rule.
|
|
return false;
|
|
}
|
|
if (ast.isParserRule(node.rule.ref) && !isDataTypeRuleInternal(node.rule.ref, visited)) {
|
|
return false;
|
|
}
|
|
}
|
|
else if (ast.isAssignment(node)) {
|
|
return false;
|
|
}
|
|
else if (ast.isAction(node)) {
|
|
return false;
|
|
}
|
|
}
|
|
return Boolean(rule.definition);
|
|
}
|
|
export function isDataType(type) {
|
|
return isDataTypeInternal(type.type, new Set());
|
|
}
|
|
function isDataTypeInternal(type, visited) {
|
|
if (visited.has(type)) {
|
|
return true;
|
|
}
|
|
else {
|
|
visited.add(type);
|
|
}
|
|
if (ast.isArrayType(type)) {
|
|
return false;
|
|
}
|
|
else if (ast.isReferenceType(type)) {
|
|
return false;
|
|
}
|
|
else if (ast.isUnionType(type)) {
|
|
return type.types.every(e => isDataTypeInternal(e, visited));
|
|
}
|
|
else if (ast.isSimpleType(type)) {
|
|
if (type.primitiveType !== undefined) {
|
|
return true;
|
|
}
|
|
else if (type.stringType !== undefined) {
|
|
return true;
|
|
}
|
|
else if (type.typeRef !== undefined) {
|
|
const ref = type.typeRef.ref;
|
|
if (ast.isType(ref)) {
|
|
return isDataTypeInternal(ref.type, visited);
|
|
}
|
|
else {
|
|
return false;
|
|
}
|
|
}
|
|
else {
|
|
return false;
|
|
}
|
|
}
|
|
else {
|
|
return false;
|
|
}
|
|
}
|
|
export function getExplicitRuleType(rule) {
|
|
if (rule.inferredType) {
|
|
return rule.inferredType.name;
|
|
}
|
|
else if (rule.dataType) {
|
|
return rule.dataType;
|
|
}
|
|
else if (rule.returnType) {
|
|
const refType = rule.returnType.ref;
|
|
if (refType) {
|
|
// check if we need to check Action as return type
|
|
if (ast.isParserRule(refType)) {
|
|
return refType.name;
|
|
}
|
|
else if (ast.isInterface(refType) || ast.isType(refType)) {
|
|
return refType.name;
|
|
}
|
|
}
|
|
}
|
|
return undefined;
|
|
}
|
|
export function getTypeName(type) {
|
|
var _a;
|
|
if (ast.isParserRule(type)) {
|
|
return isDataTypeRule(type) ? type.name : (_a = getExplicitRuleType(type)) !== null && _a !== void 0 ? _a : type.name;
|
|
}
|
|
else if (ast.isInterface(type) || ast.isType(type) || ast.isReturnType(type)) {
|
|
return type.name;
|
|
}
|
|
else if (ast.isAction(type)) {
|
|
const actionType = getActionType(type);
|
|
if (actionType) {
|
|
return actionType;
|
|
}
|
|
}
|
|
else if (ast.isInferredType(type)) {
|
|
return type.name;
|
|
}
|
|
throw new Error('Cannot get name of Unknown Type');
|
|
}
|
|
export function getActionType(action) {
|
|
var _a;
|
|
if (action.inferredType) {
|
|
return action.inferredType.name;
|
|
}
|
|
else if ((_a = action.type) === null || _a === void 0 ? void 0 : _a.ref) {
|
|
return getTypeName(action.type.ref);
|
|
}
|
|
return undefined; // not inferring and not referencing a valid type
|
|
}
|
|
/**
|
|
* This function is used at development time (for code generation and the internal type system) to get the type of the AST node produced by the given rule.
|
|
* For data type rules, the name of the rule is returned,
|
|
* e.g. "INT_value returns number: MY_INT;" returns "INT_value".
|
|
* @param rule the given rule
|
|
* @returns the name of the AST node type of the rule
|
|
*/
|
|
export function getRuleTypeName(rule) {
|
|
var _a, _b, _c;
|
|
if (ast.isTerminalRule(rule)) {
|
|
return (_b = (_a = rule.type) === null || _a === void 0 ? void 0 : _a.name) !== null && _b !== void 0 ? _b : 'string';
|
|
}
|
|
else {
|
|
return isDataTypeRule(rule) ? rule.name : (_c = getExplicitRuleType(rule)) !== null && _c !== void 0 ? _c : rule.name;
|
|
}
|
|
}
|
|
/**
|
|
* This function is used at runtime to get the actual type of the values produced by the given rule at runtime.
|
|
* For data type rules, the name of the declared return type of the rule is returned (if any),
|
|
* e.g. "INT_value returns number: MY_INT;" returns "number".
|
|
* @param rule the given rule
|
|
* @returns the name of the type of the produced values of the rule at runtime
|
|
*/
|
|
export function getRuleType(rule) {
|
|
var _a, _b, _c;
|
|
if (ast.isTerminalRule(rule)) {
|
|
return (_b = (_a = rule.type) === null || _a === void 0 ? void 0 : _a.name) !== null && _b !== void 0 ? _b : 'string';
|
|
}
|
|
else {
|
|
return (_c = getExplicitRuleType(rule)) !== null && _c !== void 0 ? _c : rule.name;
|
|
}
|
|
}
|
|
export function terminalRegex(terminalRule) {
|
|
const flags = {
|
|
s: false,
|
|
i: false,
|
|
u: false
|
|
};
|
|
const source = abstractElementToRegex(terminalRule.definition, flags);
|
|
const flagText = Object.entries(flags).filter(([, value]) => value).map(([name]) => name).join('');
|
|
return new RegExp(source, flagText);
|
|
}
|
|
// Using [\s\S]* allows to match everything, compared to . which doesn't match line terminators
|
|
const WILDCARD = /[\s\S]/.source;
|
|
function abstractElementToRegex(element, flags) {
|
|
if (ast.isTerminalAlternatives(element)) {
|
|
return terminalAlternativesToRegex(element);
|
|
}
|
|
else if (ast.isTerminalGroup(element)) {
|
|
return terminalGroupToRegex(element);
|
|
}
|
|
else if (ast.isCharacterRange(element)) {
|
|
return characterRangeToRegex(element);
|
|
}
|
|
else if (ast.isTerminalRuleCall(element)) {
|
|
const rule = element.rule.ref;
|
|
if (!rule) {
|
|
throw new Error('Missing rule reference.');
|
|
}
|
|
return withCardinality(abstractElementToRegex(rule.definition), {
|
|
cardinality: element.cardinality,
|
|
lookahead: element.lookahead
|
|
});
|
|
}
|
|
else if (ast.isNegatedToken(element)) {
|
|
return negateTokenToRegex(element);
|
|
}
|
|
else if (ast.isUntilToken(element)) {
|
|
return untilTokenToRegex(element);
|
|
}
|
|
else if (ast.isRegexToken(element)) {
|
|
const lastSlash = element.regex.lastIndexOf('/');
|
|
const source = element.regex.substring(1, lastSlash);
|
|
const regexFlags = element.regex.substring(lastSlash + 1);
|
|
if (flags) {
|
|
flags.i = regexFlags.includes('i');
|
|
flags.s = regexFlags.includes('s');
|
|
flags.u = regexFlags.includes('u');
|
|
}
|
|
return withCardinality(source, {
|
|
cardinality: element.cardinality,
|
|
lookahead: element.lookahead,
|
|
wrap: false
|
|
});
|
|
}
|
|
else if (ast.isWildcard(element)) {
|
|
return withCardinality(WILDCARD, {
|
|
cardinality: element.cardinality,
|
|
lookahead: element.lookahead
|
|
});
|
|
}
|
|
else {
|
|
throw new Error(`Invalid terminal element: ${element === null || element === void 0 ? void 0 : element.$type}`);
|
|
}
|
|
}
|
|
function terminalAlternativesToRegex(alternatives) {
|
|
return withCardinality(alternatives.elements.map(e => abstractElementToRegex(e)).join('|'), {
|
|
cardinality: alternatives.cardinality,
|
|
lookahead: alternatives.lookahead
|
|
});
|
|
}
|
|
function terminalGroupToRegex(group) {
|
|
return withCardinality(group.elements.map(e => abstractElementToRegex(e)).join(''), {
|
|
cardinality: group.cardinality,
|
|
lookahead: group.lookahead
|
|
});
|
|
}
|
|
function untilTokenToRegex(until) {
|
|
return withCardinality(`${WILDCARD}*?${abstractElementToRegex(until.terminal)}`, {
|
|
cardinality: until.cardinality,
|
|
lookahead: until.lookahead
|
|
});
|
|
}
|
|
function negateTokenToRegex(negate) {
|
|
return withCardinality(`(?!${abstractElementToRegex(negate.terminal)})${WILDCARD}*?`, {
|
|
cardinality: negate.cardinality,
|
|
lookahead: negate.lookahead
|
|
});
|
|
}
|
|
function characterRangeToRegex(range) {
|
|
if (range.right) {
|
|
return withCardinality(`[${keywordToRegex(range.left)}-${keywordToRegex(range.right)}]`, {
|
|
cardinality: range.cardinality,
|
|
lookahead: range.lookahead,
|
|
wrap: false
|
|
});
|
|
}
|
|
return withCardinality(keywordToRegex(range.left), {
|
|
cardinality: range.cardinality,
|
|
lookahead: range.lookahead,
|
|
wrap: false
|
|
});
|
|
}
|
|
function keywordToRegex(keyword) {
|
|
return escapeRegExp(keyword.value);
|
|
}
|
|
function withCardinality(regex, options) {
|
|
var _a;
|
|
if (options.wrap !== false || options.lookahead) {
|
|
regex = `(${(_a = options.lookahead) !== null && _a !== void 0 ? _a : ''}${regex})`;
|
|
}
|
|
if (options.cardinality) {
|
|
return `${regex}${options.cardinality}`;
|
|
}
|
|
return regex;
|
|
}
|
|
//# sourceMappingURL=grammar-utils.js.map
|