From 96de2d71d7055709d0c92d41c10101b777f48ed8 Mon Sep 17 00:00:00 2001 From: Jesse Rosenberger Date: Fri, 1 Feb 2019 18:14:30 +0200 Subject: [PATCH] Move Engine signature AST traversals/transforms into `./transforms` module. These AST visitors and transformations are more generally usable for other purposes rather than just the Apollo Engine signature reporting and would seem to belong in a module of their own. --- .../src/__tests__/signature.test.ts | 72 +------ .../src/__tests__/transforms.test.ts | 77 +++++++ packages/apollo-graphql/src/index.ts | 2 +- packages/apollo-graphql/src/signature.ts | 194 +---------------- packages/apollo-graphql/src/transforms.ts | 195 ++++++++++++++++++ 5 files changed, 284 insertions(+), 256 deletions(-) create mode 100644 packages/apollo-graphql/src/__tests__/transforms.test.ts create mode 100644 packages/apollo-graphql/src/transforms.ts diff --git a/packages/apollo-graphql/src/__tests__/signature.test.ts b/packages/apollo-graphql/src/__tests__/signature.test.ts index 7e3e7314..161e07ef 100644 --- a/packages/apollo-graphql/src/__tests__/signature.test.ts +++ b/packages/apollo-graphql/src/__tests__/signature.test.ts @@ -7,82 +7,12 @@ import { dropUnusedDefinitions, sortAST, removeAliases, -} from '../signature'; +} from '../transforms'; // The gql duplicate fragment warning feature really is just warnings; nothing // breaks if you turn it off in tests. disableFragmentWarnings(); -describe('printWithReducedWhitespace', () => { - const cases = [ - { - name: 'lots of whitespace', - // Note: there's a tab after "tab->", which prettier wants to keep as a - // literal tab rather than \t. In the output, there should be a literal - // backslash-t. - input: gql` - query Foo($a: Int) { - user( - name: " tab-> yay" - other: """ - apple - bag - cat - """ - ) { - name - } - } - `, - output: - 'query Foo($a:Int){user(name:" tab->\\tyay",other:"apple\\n bag\\ncat"){name}}', - }, - ]; - cases.forEach(({ name, input, output }) => { - test(name, () => { - expect(printWithReducedWhitespace(input)).toEqual(output); - }); - }); -}); - -describe('hideLiterals', () => { - const cases = [ - { - name: 'full test', - input: gql` - query Foo($b: Int, $a: Boolean) { - user(name: "hello", age: 5) { - ...Bar - ... on User { - hello - bee - } - tz - aliased: name - } - } - - fragment Bar on User { - age @skip(if: $a) - ...Nested - } - - fragment Nested on User { - blah - } - `, - output: - 'query Foo($b:Int,$a:Boolean){user(name:"",age:0){...Bar...on User{hello bee}tz aliased:name}}' + - 'fragment Bar on User{age@skip(if:$a)...Nested}fragment Nested on User{blah}', - }, - ]; - cases.forEach(({ name, input, output }) => { - test(name, () => { - expect(printWithReducedWhitespace(hideLiterals(input))).toEqual(output); - }); - }); -}); - describe('aggressive signature', () => { function aggressive(ast: DocumentNode, operationName: string): string { return printWithReducedWhitespace( diff --git a/packages/apollo-graphql/src/__tests__/transforms.test.ts b/packages/apollo-graphql/src/__tests__/transforms.test.ts new file mode 100644 index 00000000..e0ad5e47 --- /dev/null +++ b/packages/apollo-graphql/src/__tests__/transforms.test.ts @@ -0,0 +1,77 @@ +import { default as gql, disableFragmentWarnings } from 'graphql-tag'; + +import { printWithReducedWhitespace, hideLiterals } from '../transforms'; + +// The gql duplicate fragment warning feature really is just warnings; nothing +// breaks if you turn it off in tests. +disableFragmentWarnings(); + +describe('printWithReducedWhitespace', () => { + const cases = [ + { + name: 'lots of whitespace', + // Note: there's a tab after "tab->", which prettier wants to keep as a + // literal tab rather than \t. In the output, there should be a literal + // backslash-t. + input: gql` + query Foo($a: Int) { + user( + name: " tab-> yay" + other: """ + apple + bag + cat + """ + ) { + name + } + } + `, + output: + 'query Foo($a:Int){user(name:" tab->\\tyay",other:"apple\\n bag\\ncat"){name}}', + }, + ]; + cases.forEach(({ name, input, output }) => { + test(name, () => { + expect(printWithReducedWhitespace(input)).toEqual(output); + }); + }); +}); + +describe('hideLiterals', () => { + const cases = [ + { + name: 'full test', + input: gql` + query Foo($b: Int, $a: Boolean) { + user(name: "hello", age: 5) { + ...Bar + ... on User { + hello + bee + } + tz + aliased: name + } + } + + fragment Bar on User { + age @skip(if: $a) + ...Nested + } + + fragment Nested on User { + blah + } + `, + output: + 'query Foo($b:Int,$a:Boolean){user(name:"",age:0){...Bar...on User{hello bee}tz aliased:name}}' + + 'fragment Bar on User{age@skip(if:$a)...Nested}fragment Nested on User{blah}', + }, + ]; + cases.forEach(({ name, input, output }) => { + test(name, () => { + expect(printWithReducedWhitespace(hideLiterals(input))).toEqual(output); + }); + }); +}); diff --git a/packages/apollo-graphql/src/index.ts b/packages/apollo-graphql/src/index.ts index bfff26d0..8161d664 100644 --- a/packages/apollo-graphql/src/index.ts +++ b/packages/apollo-graphql/src/index.ts @@ -1 +1 @@ -export { defaultEngineReportingSignature } from './signature'; \ No newline at end of file +export { defaultEngineReportingSignature } from './signature'; diff --git a/packages/apollo-graphql/src/signature.ts b/packages/apollo-graphql/src/signature.ts index 6fa6f6d2..8ddc71fe 100644 --- a/packages/apollo-graphql/src/signature.ts +++ b/packages/apollo-graphql/src/signature.ts @@ -1,5 +1,3 @@ -// XXX maybe this should just be its own graphql-signature package - // In Engine, we want to group requests making the same query together, and // treat different queries distinctly. But what does it mean for two queries to // be "the same"? And what if you don't want to send the full text of the query @@ -16,9 +14,9 @@ // valid GraphQL query, though as of now the Engine servers do not re-parse your // signature and do not expect it to match the execution tree in the trace. // -// This file provides several useful building blocks for writing your own -// signature function. These are: -// +// This module utilizes several AST transformations from the adjacent +// 'transforms' module (which are also for writing your own signature method). + // - dropUnusedDefinitions, which removes operations and fragments that // aren't going to be used in execution // - hideLiterals, which replaces all numeric and string literals as well @@ -46,186 +44,14 @@ // algorithm on it, and the details of the signature algorithm are now up to the // reporting agent. -import { sortBy, ListIteratee } from 'lodash'; - +import { DocumentNode } from 'graphql'; import { - print, - visit, - DocumentNode, - OperationDefinitionNode, - SelectionSetNode, - FieldNode, - FragmentSpreadNode, - InlineFragmentNode, - FragmentDefinitionNode, - DirectiveNode, - IntValueNode, - FloatValueNode, - StringValueNode, - ListValueNode, - ObjectValueNode, - separateOperations, -} from 'graphql'; - -// Replace numeric, string, list, and object literals with "empty" -// values. Leaves enums alone (since there's no consistent "zero" enum). This -// can help combine similar queries if you substitute values directly into -// queries rather than use GraphQL variables, and can hide sensitive data in -// your query (say, a hardcoded API key) from Engine servers, but in general -// avoiding those situations is better than working around them. -export function hideLiterals(ast: DocumentNode): DocumentNode { - return visit(ast, { - IntValue(node: IntValueNode): IntValueNode { - return { ...node, value: '0' }; - }, - FloatValue(node: FloatValueNode): FloatValueNode { - return { ...node, value: '0' }; - }, - StringValue(node: StringValueNode): StringValueNode { - return { ...node, value: '', block: false }; - }, - ListValue(node: ListValueNode): ListValueNode { - return { ...node, values: [] }; - }, - ObjectValue(node: ObjectValueNode): ObjectValueNode { - return { ...node, fields: [] }; - }, - }); -} - -// A GraphQL query may contain multiple named operations, with the operation to -// use specified separately by the client. This transformation drops unused -// operations from the query, as well as any fragment definitions that are not -// referenced. (In general we recommend that unused definitions are dropped on -// the client before sending to the server to save bandwidth and parsing time.) -export function dropUnusedDefinitions( - ast: DocumentNode, - operationName: string, -): DocumentNode { - const separated = separateOperations(ast)[operationName]; - if (!separated) { - // If the given operationName isn't found, just make this whole transform a - // no-op instead of crashing. - return ast; - } - return separated; -} - -// Like lodash's sortBy, but sorted(undefined) === undefined rather than []. It -// is a stable non-in-place sort. -function sorted( - items: ReadonlyArray | undefined, - ...iteratees: Array> -): Array | undefined { - if (items) { - return sortBy(items, ...iteratees); - } - return undefined; -} - -// sortAST sorts most multi-child nodes alphabetically. Using this as part of -// your signature calculation function may make it easier to tell the difference -// between queries that are similar to each other, and if for some reason your -// GraphQL client generates query strings with elements in nondeterministic -// order, it can make sure the queries are treated as identical. -export function sortAST(ast: DocumentNode): DocumentNode { - return visit(ast, { - OperationDefinition( - node: OperationDefinitionNode, - ): OperationDefinitionNode { - return { - ...node, - variableDefinitions: sorted( - node.variableDefinitions, - 'variable.name.value', - ), - }; - }, - SelectionSet(node: SelectionSetNode): SelectionSetNode { - return { - ...node, - // Define an ordering for field names in a SelectionSet. Field first, - // then FragmentSpread, then InlineFragment. By a lovely coincidence, - // the order we want them to appear in is alphabetical by node.kind. - // Use sortBy instead of sorted because 'selections' is not optional. - selections: sortBy(node.selections, 'kind', 'name.value'), - }; - }, - Field(node: FieldNode): FieldNode { - return { - ...node, - arguments: sorted(node.arguments, 'name.value'), - }; - }, - FragmentSpread(node: FragmentSpreadNode): FragmentSpreadNode { - return { ...node, directives: sorted(node.directives, 'name.value') }; - }, - InlineFragment(node: InlineFragmentNode): InlineFragmentNode { - return { ...node, directives: sorted(node.directives, 'name.value') }; - }, - FragmentDefinition(node: FragmentDefinitionNode): FragmentDefinitionNode { - return { - ...node, - directives: sorted(node.directives, 'name.value'), - variableDefinitions: sorted( - node.variableDefinitions, - 'variable.name.value', - ), - }; - }, - Directive(node: DirectiveNode): DirectiveNode { - return { ...node, arguments: sorted(node.arguments, 'name.value') }; - }, - }); -} - -// removeAliases gets rid of GraphQL aliases, a feature by which you can tell a -// server to return a field's data under a different name from the field -// name. Maybe this is useful if somebody somewhere inserts random aliases into -// their queries. -export function removeAliases(ast: DocumentNode): DocumentNode { - return visit(ast, { - Field(node: FieldNode): FieldNode { - return { - ...node, - alias: undefined, - }; - }, - }); -} - -// Like the graphql-js print function, but deleting whitespace wherever -// feasible. Specifically, all whitespace (outside of string literals) is -// reduced to at most one space, and even that space is removed anywhere except -// for between two alphanumerics. -export function printWithReducedWhitespace(ast: DocumentNode): string { - // In a GraphQL AST (which notably does not contain comments), the only place - // where meaningful whitespace (or double quotes) can exist is in - // StringNodes. So to print with reduced whitespace, we: - // - temporarily sanitize strings by replacing their contents with hex - // - use the default GraphQL printer - // - minimize the whitespace with a simple regexp replacement - // - convert strings back to their actual value - // We normalize all strings to non-block strings for simplicity. - - const sanitizedAST = visit(ast, { - StringValue(node: StringValueNode): StringValueNode { - return { - ...node, - value: Buffer.from(node.value, 'utf8').toString('hex'), - block: false, - }; - }, - }); - const withWhitespace = print(sanitizedAST); - const minimizedButStillHex = withWhitespace - .replace(/\s+/g, ' ') - .replace(/([^_a-zA-Z0-9]) /g, (_, c) => c) - .replace(/ ([^_a-zA-Z0-9])/g, (_, c) => c); - return minimizedButStillHex.replace(/"([a-f0-9]+)"/g, (_, hex) => - JSON.stringify(Buffer.from(hex, 'hex').toString('utf8')), - ); -} + printWithReducedWhitespace, + dropUnusedDefinitions, + removeAliases, + sortAST, + hideLiterals, +} from './transforms'; // The default signature function consists of removing unused definitions // and whitespace. diff --git a/packages/apollo-graphql/src/transforms.ts b/packages/apollo-graphql/src/transforms.ts new file mode 100644 index 00000000..77759db0 --- /dev/null +++ b/packages/apollo-graphql/src/transforms.ts @@ -0,0 +1,195 @@ +import { visit } from 'graphql/language/visitor'; +import { + DocumentNode, + FloatValueNode, + IntValueNode, + StringValueNode, + OperationDefinitionNode, + SelectionSetNode, + FragmentSpreadNode, + InlineFragmentNode, + DirectiveNode, + FieldNode, + FragmentDefinitionNode, + ObjectValueNode, + ListValueNode, +} from 'graphql/language/ast'; +import { print } from 'graphql/language/printer'; +import { separateOperations } from 'graphql/utilities'; +import { sortBy, ListIteratee } from 'lodash'; + +// Replace numeric, string, list, and object literals with "empty" +// values. Leaves enums alone (since there's no consistent "zero" enum). This +// can help combine similar queries if you substitute values directly into +// queries rather than use GraphQL variables, and can hide sensitive data in +// your query (say, a hardcoded API key) from Engine servers, but in general +// avoiding those situations is better than working around them. +export function hideLiterals(ast: DocumentNode): DocumentNode { + return visit(ast, { + IntValue(node: IntValueNode): IntValueNode { + return { ...node, value: '0' }; + }, + FloatValue(node: FloatValueNode): FloatValueNode { + return { ...node, value: '0' }; + }, + StringValue(node: StringValueNode): StringValueNode { + return { ...node, value: '', block: false }; + }, + ListValue(node: ListValueNode): ListValueNode { + return { ...node, values: [] }; + }, + ObjectValue(node: ObjectValueNode): ObjectValueNode { + return { ...node, fields: [] }; + }, + }); +} + +// In the same spirit as the similarly named `hideLiterals` function, only +// hide string and numeric literals. +export function hideStringAndNumericLiterals(ast: DocumentNode): DocumentNode { + return visit(ast, { + IntValue(node: IntValueNode): IntValueNode { + return { ...node, value: '0' }; + }, + FloatValue(node: FloatValueNode): FloatValueNode { + return { ...node, value: '0' }; + }, + StringValue(node: StringValueNode): StringValueNode { + return { ...node, value: '', block: false }; + }, + }); +} + +// A GraphQL query may contain multiple named operations, with the operation to +// use specified separately by the client. This transformation drops unused +// operations from the query, as well as any fragment definitions that are not +// referenced. (In general we recommend that unused definitions are dropped on +// the client before sending to the server to save bandwidth and parsing time.) +export function dropUnusedDefinitions( + ast: DocumentNode, + operationName: string, +): DocumentNode { + const separated = separateOperations(ast)[operationName]; + if (!separated) { + // If the given operationName isn't found, just make this whole transform a + // no-op instead of crashing. + return ast; + } + return separated; +} + +// Like lodash's sortBy, but sorted(undefined) === undefined rather than []. It +// is a stable non-in-place sort. +function sorted( + items: ReadonlyArray | undefined, + ...iteratees: Array> +): Array | undefined { + if (items) { + return sortBy(items, ...iteratees); + } + return undefined; +} + +// sortAST sorts most multi-child nodes alphabetically. Using this as part of +// your signature calculation function may make it easier to tell the difference +// between queries that are similar to each other, and if for some reason your +// GraphQL client generates query strings with elements in nondeterministic +// order, it can make sure the queries are treated as identical. +export function sortAST(ast: DocumentNode): DocumentNode { + return visit(ast, { + OperationDefinition( + node: OperationDefinitionNode, + ): OperationDefinitionNode { + return { + ...node, + variableDefinitions: sorted( + node.variableDefinitions, + 'variable.name.value', + ), + }; + }, + SelectionSet(node: SelectionSetNode): SelectionSetNode { + return { + ...node, + // Define an ordering for field names in a SelectionSet. Field first, + // then FragmentSpread, then InlineFragment. By a lovely coincidence, + // the order we want them to appear in is alphabetical by node.kind. + // Use sortBy instead of sorted because 'selections' is not optional. + selections: sortBy(node.selections, 'kind', 'name.value'), + }; + }, + Field(node: FieldNode): FieldNode { + return { + ...node, + arguments: sorted(node.arguments, 'name.value'), + }; + }, + FragmentSpread(node: FragmentSpreadNode): FragmentSpreadNode { + return { ...node, directives: sorted(node.directives, 'name.value') }; + }, + InlineFragment(node: InlineFragmentNode): InlineFragmentNode { + return { ...node, directives: sorted(node.directives, 'name.value') }; + }, + FragmentDefinition(node: FragmentDefinitionNode): FragmentDefinitionNode { + return { + ...node, + directives: sorted(node.directives, 'name.value'), + variableDefinitions: sorted( + node.variableDefinitions, + 'variable.name.value', + ), + }; + }, + Directive(node: DirectiveNode): DirectiveNode { + return { ...node, arguments: sorted(node.arguments, 'name.value') }; + }, + }); +} + +// removeAliases gets rid of GraphQL aliases, a feature by which you can tell a +// server to return a field's data under a different name from the field +// name. Maybe this is useful if somebody somewhere inserts random aliases into +// their queries. +export function removeAliases(ast: DocumentNode): DocumentNode { + return visit(ast, { + Field(node: FieldNode): FieldNode { + return { + ...node, + alias: undefined, + }; + }, + }); +} + +// Like the graphql-js print function, but deleting whitespace wherever +// feasible. Specifically, all whitespace (outside of string literals) is +// reduced to at most one space, and even that space is removed anywhere except +// for between two alphanumerics. +export function printWithReducedWhitespace(ast: DocumentNode): string { + // In a GraphQL AST (which notably does not contain comments), the only place + // where meaningful whitespace (or double quotes) can exist is in + // StringNodes. So to print with reduced whitespace, we: + // - temporarily sanitize strings by replacing their contents with hex + // - use the default GraphQL printer + // - minimize the whitespace with a simple regexp replacement + // - convert strings back to their actual value + // We normalize all strings to non-block strings for simplicity. + + const sanitizedAST = visit(ast, { + StringValue(node: StringValueNode): StringValueNode { + return { + ...node, + value: Buffer.from(node.value, 'utf8').toString('hex'), + block: false, + }; + }, + }); + const withWhitespace = print(sanitizedAST); + const minimizedButStillHex = withWhitespace + .replace(/\s+/g, ' ') + .replace(/([^_a-zA-Z0-9]) /g, (_, c) => c) + .replace(/ ([^_a-zA-Z0-9])/g, (_, c) => c); + return minimizedButStillHex.replace(/"([a-f0-9]+)"/g, (_, hex) => + JSON.stringify(Buffer.from(hex, 'hex').toString('utf8')), + ); +}