Move Engine signature AST traversals/transforms into ./transforms module.

These AST visitors and transformations are more generally usable for other
purposes rather than just the Apollo Engine signature reporting and would
seem to belong in a module of their own.
This commit is contained in:
Jesse Rosenberger 2019-02-01 18:14:30 +02:00
parent e565226370
commit 96de2d71d7
No known key found for this signature in database
GPG key ID: C0CCCF81AA6C08D8
5 changed files with 284 additions and 256 deletions

View file

@ -7,82 +7,12 @@ import {
dropUnusedDefinitions,
sortAST,
removeAliases,
} from '../signature';
} from '../transforms';
// The gql duplicate fragment warning feature really is just warnings; nothing
// breaks if you turn it off in tests.
disableFragmentWarnings();
describe('printWithReducedWhitespace', () => {
const cases = [
{
name: 'lots of whitespace',
// Note: there's a tab after "tab->", which prettier wants to keep as a
// literal tab rather than \t. In the output, there should be a literal
// backslash-t.
input: gql`
query Foo($a: Int) {
user(
name: " tab-> yay"
other: """
apple
bag
cat
"""
) {
name
}
}
`,
output:
'query Foo($a:Int){user(name:" tab->\\tyay",other:"apple\\n bag\\ncat"){name}}',
},
];
cases.forEach(({ name, input, output }) => {
test(name, () => {
expect(printWithReducedWhitespace(input)).toEqual(output);
});
});
});
describe('hideLiterals', () => {
const cases = [
{
name: 'full test',
input: gql`
query Foo($b: Int, $a: Boolean) {
user(name: "hello", age: 5) {
...Bar
... on User {
hello
bee
}
tz
aliased: name
}
}
fragment Bar on User {
age @skip(if: $a)
...Nested
}
fragment Nested on User {
blah
}
`,
output:
'query Foo($b:Int,$a:Boolean){user(name:"",age:0){...Bar...on User{hello bee}tz aliased:name}}' +
'fragment Bar on User{age@skip(if:$a)...Nested}fragment Nested on User{blah}',
},
];
cases.forEach(({ name, input, output }) => {
test(name, () => {
expect(printWithReducedWhitespace(hideLiterals(input))).toEqual(output);
});
});
});
describe('aggressive signature', () => {
function aggressive(ast: DocumentNode, operationName: string): string {
return printWithReducedWhitespace(

View file

@ -0,0 +1,77 @@
import { default as gql, disableFragmentWarnings } from 'graphql-tag';
import { printWithReducedWhitespace, hideLiterals } from '../transforms';
// The gql duplicate fragment warning feature really is just warnings; nothing
// breaks if you turn it off in tests.
disableFragmentWarnings();
describe('printWithReducedWhitespace', () => {
const cases = [
{
name: 'lots of whitespace',
// Note: there's a tab after "tab->", which prettier wants to keep as a
// literal tab rather than \t. In the output, there should be a literal
// backslash-t.
input: gql`
query Foo($a: Int) {
user(
name: " tab-> yay"
other: """
apple
bag
cat
"""
) {
name
}
}
`,
output:
'query Foo($a:Int){user(name:" tab->\\tyay",other:"apple\\n bag\\ncat"){name}}',
},
];
cases.forEach(({ name, input, output }) => {
test(name, () => {
expect(printWithReducedWhitespace(input)).toEqual(output);
});
});
});
describe('hideLiterals', () => {
const cases = [
{
name: 'full test',
input: gql`
query Foo($b: Int, $a: Boolean) {
user(name: "hello", age: 5) {
...Bar
... on User {
hello
bee
}
tz
aliased: name
}
}
fragment Bar on User {
age @skip(if: $a)
...Nested
}
fragment Nested on User {
blah
}
`,
output:
'query Foo($b:Int,$a:Boolean){user(name:"",age:0){...Bar...on User{hello bee}tz aliased:name}}' +
'fragment Bar on User{age@skip(if:$a)...Nested}fragment Nested on User{blah}',
},
];
cases.forEach(({ name, input, output }) => {
test(name, () => {
expect(printWithReducedWhitespace(hideLiterals(input))).toEqual(output);
});
});
});

View file

@ -1 +1 @@
export { defaultEngineReportingSignature } from './signature';
export { defaultEngineReportingSignature } from './signature';

View file

@ -1,5 +1,3 @@
// XXX maybe this should just be its own graphql-signature package
// In Engine, we want to group requests making the same query together, and
// treat different queries distinctly. But what does it mean for two queries to
// be "the same"? And what if you don't want to send the full text of the query
@ -16,9 +14,9 @@
// valid GraphQL query, though as of now the Engine servers do not re-parse your
// signature and do not expect it to match the execution tree in the trace.
//
// This file provides several useful building blocks for writing your own
// signature function. These are:
//
// This module utilizes several AST transformations from the adjacent
// 'transforms' module (which are also for writing your own signature method).
// - dropUnusedDefinitions, which removes operations and fragments that
// aren't going to be used in execution
// - hideLiterals, which replaces all numeric and string literals as well
@ -46,186 +44,14 @@
// algorithm on it, and the details of the signature algorithm are now up to the
// reporting agent.
import { sortBy, ListIteratee } from 'lodash';
import { DocumentNode } from 'graphql';
import {
print,
visit,
DocumentNode,
OperationDefinitionNode,
SelectionSetNode,
FieldNode,
FragmentSpreadNode,
InlineFragmentNode,
FragmentDefinitionNode,
DirectiveNode,
IntValueNode,
FloatValueNode,
StringValueNode,
ListValueNode,
ObjectValueNode,
separateOperations,
} from 'graphql';
// Replace numeric, string, list, and object literals with "empty"
// values. Leaves enums alone (since there's no consistent "zero" enum). This
// can help combine similar queries if you substitute values directly into
// queries rather than use GraphQL variables, and can hide sensitive data in
// your query (say, a hardcoded API key) from Engine servers, but in general
// avoiding those situations is better than working around them.
export function hideLiterals(ast: DocumentNode): DocumentNode {
return visit(ast, {
IntValue(node: IntValueNode): IntValueNode {
return { ...node, value: '0' };
},
FloatValue(node: FloatValueNode): FloatValueNode {
return { ...node, value: '0' };
},
StringValue(node: StringValueNode): StringValueNode {
return { ...node, value: '', block: false };
},
ListValue(node: ListValueNode): ListValueNode {
return { ...node, values: [] };
},
ObjectValue(node: ObjectValueNode): ObjectValueNode {
return { ...node, fields: [] };
},
});
}
// A GraphQL query may contain multiple named operations, with the operation to
// use specified separately by the client. This transformation drops unused
// operations from the query, as well as any fragment definitions that are not
// referenced. (In general we recommend that unused definitions are dropped on
// the client before sending to the server to save bandwidth and parsing time.)
export function dropUnusedDefinitions(
ast: DocumentNode,
operationName: string,
): DocumentNode {
const separated = separateOperations(ast)[operationName];
if (!separated) {
// If the given operationName isn't found, just make this whole transform a
// no-op instead of crashing.
return ast;
}
return separated;
}
// Like lodash's sortBy, but sorted(undefined) === undefined rather than []. It
// is a stable non-in-place sort.
function sorted<T>(
items: ReadonlyArray<T> | undefined,
...iteratees: Array<ListIteratee<T>>
): Array<T> | undefined {
if (items) {
return sortBy(items, ...iteratees);
}
return undefined;
}
// sortAST sorts most multi-child nodes alphabetically. Using this as part of
// your signature calculation function may make it easier to tell the difference
// between queries that are similar to each other, and if for some reason your
// GraphQL client generates query strings with elements in nondeterministic
// order, it can make sure the queries are treated as identical.
export function sortAST(ast: DocumentNode): DocumentNode {
return visit(ast, {
OperationDefinition(
node: OperationDefinitionNode,
): OperationDefinitionNode {
return {
...node,
variableDefinitions: sorted(
node.variableDefinitions,
'variable.name.value',
),
};
},
SelectionSet(node: SelectionSetNode): SelectionSetNode {
return {
...node,
// Define an ordering for field names in a SelectionSet. Field first,
// then FragmentSpread, then InlineFragment. By a lovely coincidence,
// the order we want them to appear in is alphabetical by node.kind.
// Use sortBy instead of sorted because 'selections' is not optional.
selections: sortBy(node.selections, 'kind', 'name.value'),
};
},
Field(node: FieldNode): FieldNode {
return {
...node,
arguments: sorted(node.arguments, 'name.value'),
};
},
FragmentSpread(node: FragmentSpreadNode): FragmentSpreadNode {
return { ...node, directives: sorted(node.directives, 'name.value') };
},
InlineFragment(node: InlineFragmentNode): InlineFragmentNode {
return { ...node, directives: sorted(node.directives, 'name.value') };
},
FragmentDefinition(node: FragmentDefinitionNode): FragmentDefinitionNode {
return {
...node,
directives: sorted(node.directives, 'name.value'),
variableDefinitions: sorted(
node.variableDefinitions,
'variable.name.value',
),
};
},
Directive(node: DirectiveNode): DirectiveNode {
return { ...node, arguments: sorted(node.arguments, 'name.value') };
},
});
}
// removeAliases gets rid of GraphQL aliases, a feature by which you can tell a
// server to return a field's data under a different name from the field
// name. Maybe this is useful if somebody somewhere inserts random aliases into
// their queries.
export function removeAliases(ast: DocumentNode): DocumentNode {
return visit(ast, {
Field(node: FieldNode): FieldNode {
return {
...node,
alias: undefined,
};
},
});
}
// Like the graphql-js print function, but deleting whitespace wherever
// feasible. Specifically, all whitespace (outside of string literals) is
// reduced to at most one space, and even that space is removed anywhere except
// for between two alphanumerics.
export function printWithReducedWhitespace(ast: DocumentNode): string {
// In a GraphQL AST (which notably does not contain comments), the only place
// where meaningful whitespace (or double quotes) can exist is in
// StringNodes. So to print with reduced whitespace, we:
// - temporarily sanitize strings by replacing their contents with hex
// - use the default GraphQL printer
// - minimize the whitespace with a simple regexp replacement
// - convert strings back to their actual value
// We normalize all strings to non-block strings for simplicity.
const sanitizedAST = visit(ast, {
StringValue(node: StringValueNode): StringValueNode {
return {
...node,
value: Buffer.from(node.value, 'utf8').toString('hex'),
block: false,
};
},
});
const withWhitespace = print(sanitizedAST);
const minimizedButStillHex = withWhitespace
.replace(/\s+/g, ' ')
.replace(/([^_a-zA-Z0-9]) /g, (_, c) => c)
.replace(/ ([^_a-zA-Z0-9])/g, (_, c) => c);
return minimizedButStillHex.replace(/"([a-f0-9]+)"/g, (_, hex) =>
JSON.stringify(Buffer.from(hex, 'hex').toString('utf8')),
);
}
printWithReducedWhitespace,
dropUnusedDefinitions,
removeAliases,
sortAST,
hideLiterals,
} from './transforms';
// The default signature function consists of removing unused definitions
// and whitespace.

View file

@ -0,0 +1,195 @@
import { visit } from 'graphql/language/visitor';
import {
DocumentNode,
FloatValueNode,
IntValueNode,
StringValueNode,
OperationDefinitionNode,
SelectionSetNode,
FragmentSpreadNode,
InlineFragmentNode,
DirectiveNode,
FieldNode,
FragmentDefinitionNode,
ObjectValueNode,
ListValueNode,
} from 'graphql/language/ast';
import { print } from 'graphql/language/printer';
import { separateOperations } from 'graphql/utilities';
import { sortBy, ListIteratee } from 'lodash';
// Replace numeric, string, list, and object literals with "empty"
// values. Leaves enums alone (since there's no consistent "zero" enum). This
// can help combine similar queries if you substitute values directly into
// queries rather than use GraphQL variables, and can hide sensitive data in
// your query (say, a hardcoded API key) from Engine servers, but in general
// avoiding those situations is better than working around them.
export function hideLiterals(ast: DocumentNode): DocumentNode {
return visit(ast, {
IntValue(node: IntValueNode): IntValueNode {
return { ...node, value: '0' };
},
FloatValue(node: FloatValueNode): FloatValueNode {
return { ...node, value: '0' };
},
StringValue(node: StringValueNode): StringValueNode {
return { ...node, value: '', block: false };
},
ListValue(node: ListValueNode): ListValueNode {
return { ...node, values: [] };
},
ObjectValue(node: ObjectValueNode): ObjectValueNode {
return { ...node, fields: [] };
},
});
}
// In the same spirit as the similarly named `hideLiterals` function, only
// hide string and numeric literals.
export function hideStringAndNumericLiterals(ast: DocumentNode): DocumentNode {
return visit(ast, {
IntValue(node: IntValueNode): IntValueNode {
return { ...node, value: '0' };
},
FloatValue(node: FloatValueNode): FloatValueNode {
return { ...node, value: '0' };
},
StringValue(node: StringValueNode): StringValueNode {
return { ...node, value: '', block: false };
},
});
}
// A GraphQL query may contain multiple named operations, with the operation to
// use specified separately by the client. This transformation drops unused
// operations from the query, as well as any fragment definitions that are not
// referenced. (In general we recommend that unused definitions are dropped on
// the client before sending to the server to save bandwidth and parsing time.)
export function dropUnusedDefinitions(
ast: DocumentNode,
operationName: string,
): DocumentNode {
const separated = separateOperations(ast)[operationName];
if (!separated) {
// If the given operationName isn't found, just make this whole transform a
// no-op instead of crashing.
return ast;
}
return separated;
}
// Like lodash's sortBy, but sorted(undefined) === undefined rather than []. It
// is a stable non-in-place sort.
function sorted<T>(
items: ReadonlyArray<T> | undefined,
...iteratees: Array<ListIteratee<T>>
): Array<T> | undefined {
if (items) {
return sortBy(items, ...iteratees);
}
return undefined;
}
// sortAST sorts most multi-child nodes alphabetically. Using this as part of
// your signature calculation function may make it easier to tell the difference
// between queries that are similar to each other, and if for some reason your
// GraphQL client generates query strings with elements in nondeterministic
// order, it can make sure the queries are treated as identical.
export function sortAST(ast: DocumentNode): DocumentNode {
return visit(ast, {
OperationDefinition(
node: OperationDefinitionNode,
): OperationDefinitionNode {
return {
...node,
variableDefinitions: sorted(
node.variableDefinitions,
'variable.name.value',
),
};
},
SelectionSet(node: SelectionSetNode): SelectionSetNode {
return {
...node,
// Define an ordering for field names in a SelectionSet. Field first,
// then FragmentSpread, then InlineFragment. By a lovely coincidence,
// the order we want them to appear in is alphabetical by node.kind.
// Use sortBy instead of sorted because 'selections' is not optional.
selections: sortBy(node.selections, 'kind', 'name.value'),
};
},
Field(node: FieldNode): FieldNode {
return {
...node,
arguments: sorted(node.arguments, 'name.value'),
};
},
FragmentSpread(node: FragmentSpreadNode): FragmentSpreadNode {
return { ...node, directives: sorted(node.directives, 'name.value') };
},
InlineFragment(node: InlineFragmentNode): InlineFragmentNode {
return { ...node, directives: sorted(node.directives, 'name.value') };
},
FragmentDefinition(node: FragmentDefinitionNode): FragmentDefinitionNode {
return {
...node,
directives: sorted(node.directives, 'name.value'),
variableDefinitions: sorted(
node.variableDefinitions,
'variable.name.value',
),
};
},
Directive(node: DirectiveNode): DirectiveNode {
return { ...node, arguments: sorted(node.arguments, 'name.value') };
},
});
}
// removeAliases gets rid of GraphQL aliases, a feature by which you can tell a
// server to return a field's data under a different name from the field
// name. Maybe this is useful if somebody somewhere inserts random aliases into
// their queries.
export function removeAliases(ast: DocumentNode): DocumentNode {
return visit(ast, {
Field(node: FieldNode): FieldNode {
return {
...node,
alias: undefined,
};
},
});
}
// Like the graphql-js print function, but deleting whitespace wherever
// feasible. Specifically, all whitespace (outside of string literals) is
// reduced to at most one space, and even that space is removed anywhere except
// for between two alphanumerics.
export function printWithReducedWhitespace(ast: DocumentNode): string {
// In a GraphQL AST (which notably does not contain comments), the only place
// where meaningful whitespace (or double quotes) can exist is in
// StringNodes. So to print with reduced whitespace, we:
// - temporarily sanitize strings by replacing their contents with hex
// - use the default GraphQL printer
// - minimize the whitespace with a simple regexp replacement
// - convert strings back to their actual value
// We normalize all strings to non-block strings for simplicity.
const sanitizedAST = visit(ast, {
StringValue(node: StringValueNode): StringValueNode {
return {
...node,
value: Buffer.from(node.value, 'utf8').toString('hex'),
block: false,
};
},
});
const withWhitespace = print(sanitizedAST);
const minimizedButStillHex = withWhitespace
.replace(/\s+/g, ' ')
.replace(/([^_a-zA-Z0-9]) /g, (_, c) => c)
.replace(/ ([^_a-zA-Z0-9])/g, (_, c) => c);
return minimizedButStillHex.replace(/"([a-f0-9]+)"/g, (_, hex) =>
JSON.stringify(Buffer.from(hex, 'hex').toString('utf8')),
);
}