|
|
|
// Copyright (c) 2020, The Garble Authors.
|
|
|
|
// See LICENSE for licensing information.
|
|
|
|
|
|
|
|
package literals
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"go/ast"
|
|
|
|
"go/token"
|
|
|
|
"go/types"
|
|
|
|
mathrand "math/rand"
|
|
|
|
"strconv"
|
|
|
|
|
|
|
|
"golang.org/x/tools/go/ast/astutil"
|
|
|
|
ah "mvdan.cc/garble/internal/asthelper"
|
|
|
|
)
|
|
|
|
|
do not try to obfuscate huge literals (#204)
It's common for asset bundling code generators to produce huge literals,
for example in strings. Our literal obfuscators are meant for relatively
small string-like literals that a human would write, such as URLs, file
paths, and English text.
I ran some quick experiments, and it seems like "garble build -literals"
appears to hang trying to obfuscate literals starting at 5-20KiB. It's
not really hung; it's just doing a lot of busy work obfuscating those
literals. The code it produces is also far from ideal, so it also takes
some time to finally compile.
The generated code also led to crashes. For example, using "garble build
-literals -tiny" on a package containing literals of over a megabyte,
our use of asthelper to remove comments and shuffle line numbers could
run out of stack memory.
This all points in one direction: we never designed "-literals" to deal
with large sizes. Set a source-code-size limit of 2KiB.
We alter the literals.txt test as well, to include a few 128KiB string
literals. Before this fix, "go test" would seemingly hang on that test
for over a minute (I did not wait any longer). With the fix, those large
literals are not obfuscated, so the test ends in its usual 1-3s.
As said in the const comment, I don't believe any of this is a big
problem. Come Go 1.16, most developers should stop using asset-bundling
code generators and use go:embed instead. If we wanted to somehow
obfuscate those, it would be an entirely separate feature.
And, if someone wants to work on obfuscating truly large literals for
any reason, we need good tests and benchmarks to ensure garble does not
consume CPU for minutes or run out of memory.
I also simplified the generate-literals test command. The only argument
that matters to the script is the filename, since it's used later on.
Fixes #178.
4 years ago
|
|
|
// maxSizeBytes is the limit, in bytes, of the size of string-like literals
|
|
|
|
// which we will obfuscate. This is important, because otherwise garble can take
|
|
|
|
// a very long time to obfuscate huge code-generated literals, such as those
|
|
|
|
// corresponding to large assets.
|
|
|
|
//
|
|
|
|
// Note that this is the size of the literal in source code. For example, "\xab"
|
|
|
|
// counts as four bytes.
|
|
|
|
//
|
|
|
|
// If someone truly wants to obfuscate those, they should do that when they
|
|
|
|
// generate the code, not at build time. Plus, with Go 1.16 that technique
|
|
|
|
// should largely stop being used.
|
|
|
|
const maxSizeBytes = 2 << 10 // KiB
|
|
|
|
|
|
|
|
func randObfuscator() obfuscator {
|
|
|
|
randPos := mathrand.Intn(len(obfuscators))
|
|
|
|
return obfuscators[randPos]
|
|
|
|
}
|
|
|
|
|
|
|
|
// Obfuscate replaces literals with obfuscated anonymous functions.
|
|
|
|
func Obfuscate(file *ast.File, info *types.Info, fset *token.FileSet, ignoreObj map[types.Object]bool) *ast.File {
|
|
|
|
pre := func(cursor *astutil.Cursor) bool {
|
|
|
|
switch x := cursor.Node().(type) {
|
|
|
|
case *ast.GenDecl:
|
|
|
|
if x.Tok != token.CONST {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
for _, spec := range x.Specs {
|
|
|
|
spec := spec.(*ast.ValueSpec) // guaranteed for Tok==CONST
|
|
|
|
if len(spec.Values) == 0 {
|
|
|
|
// skip constants with inferred values
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, name := range spec.Names {
|
|
|
|
obj := info.ObjectOf(name)
|
|
|
|
|
|
|
|
basic, ok := obj.Type().(*types.Basic)
|
|
|
|
if !ok {
|
|
|
|
// skip the block if it contains non basic types
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
if basic.Info()&types.IsUntyped != 0 {
|
|
|
|
// skip the block if it contains untyped constants
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// The object cannot be obfuscated, e.g. a value that needs to be constant
|
|
|
|
if ignoreObj[obj] {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
x.Tok = token.VAR
|
|
|
|
// constants are not possible if we want to obfuscate literals, therefore
|
|
|
|
// move all constant blocks which only contain strings to variables
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
post := func(cursor *astutil.Cursor) bool {
|
|
|
|
switch x := cursor.Node().(type) {
|
|
|
|
case *ast.CompositeLit:
|
|
|
|
byteType := types.Universe.Lookup("byte").Type()
|
|
|
|
|
|
|
|
if len(x.Elts) == 0 {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
switch y := info.TypeOf(x.Type).(type) {
|
|
|
|
case *types.Array:
|
|
|
|
if y.Elem() != byteType {
|
|
|
|
return true
|
|
|
|
}
|
do not try to obfuscate huge literals (#204)
It's common for asset bundling code generators to produce huge literals,
for example in strings. Our literal obfuscators are meant for relatively
small string-like literals that a human would write, such as URLs, file
paths, and English text.
I ran some quick experiments, and it seems like "garble build -literals"
appears to hang trying to obfuscate literals starting at 5-20KiB. It's
not really hung; it's just doing a lot of busy work obfuscating those
literals. The code it produces is also far from ideal, so it also takes
some time to finally compile.
The generated code also led to crashes. For example, using "garble build
-literals -tiny" on a package containing literals of over a megabyte,
our use of asthelper to remove comments and shuffle line numbers could
run out of stack memory.
This all points in one direction: we never designed "-literals" to deal
with large sizes. Set a source-code-size limit of 2KiB.
We alter the literals.txt test as well, to include a few 128KiB string
literals. Before this fix, "go test" would seemingly hang on that test
for over a minute (I did not wait any longer). With the fix, those large
literals are not obfuscated, so the test ends in its usual 1-3s.
As said in the const comment, I don't believe any of this is a big
problem. Come Go 1.16, most developers should stop using asset-bundling
code generators and use go:embed instead. If we wanted to somehow
obfuscate those, it would be an entirely separate feature.
And, if someone wants to work on obfuscating truly large literals for
any reason, we need good tests and benchmarks to ensure garble does not
consume CPU for minutes or run out of memory.
I also simplified the generate-literals test command. The only argument
that matters to the script is the filename, since it's used later on.
Fixes #178.
4 years ago
|
|
|
if y.Len() > maxSizeBytes {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
data := make([]byte, y.Len())
|
|
|
|
|
|
|
|
for i, el := range x.Elts {
|
|
|
|
lit, ok := el.(*ast.BasicLit)
|
|
|
|
if !ok {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
value, err := strconv.Atoi(lit.Value)
|
|
|
|
if err != nil {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
data[i] = byte(value)
|
|
|
|
}
|
set positions when using cursor.Replace
The regular obfuscation process simply modifies some simple nodes, such
as identifiers and strings. In those cases, we modify the nodes
in-place, meaning that their positions remain the same. This hasn't
caused any problems.
Literal obfuscation is trickier. Since we replace one expression with an
entirely different one, we use cursor.Replace. The new expression is
entirely made up on the spot, so it lacks position information.
This was causing problems. For example, in the added test input:
> garble -literals build
[stderr]
# test/main
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:4: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:6: misplaced compiler directive
dgcm4t6w.go:7: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:9: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:3: too many errors
The build errors are because we'd move the compiler directives, which
makes the compiler unhappy as they must be directly followed by a
function declaration.
The root cause there seems to be that, since the replacement nodes lack
position information, go/printer would try to estimate its printing
position by adding to the last known position. Since -literals adds
code, this would result in the printer position increasing rapidly, and
potentially printing directive comments earlier than needed.
For now, making the replacement nodes have the same position as the
original node seems to stop go/printer from making this mistake.
It's possible that this workaround won't be bulletproof forever, but it
works well for now, and I don't see a simpler workaround right now.
It would be possible to use fancier mechanisms like go/ast.CommentMap or
dave/dst, but those are a significant amount of added complexity as well.
Fixes #285.
4 years ago
|
|
|
cursor.Replace(withPos(obfuscateByteArray(data, y.Len()), x.Pos()))
|
|
|
|
|
|
|
|
case *types.Slice:
|
|
|
|
if y.Elem() != byteType {
|
|
|
|
return true
|
|
|
|
}
|
do not try to obfuscate huge literals (#204)
It's common for asset bundling code generators to produce huge literals,
for example in strings. Our literal obfuscators are meant for relatively
small string-like literals that a human would write, such as URLs, file
paths, and English text.
I ran some quick experiments, and it seems like "garble build -literals"
appears to hang trying to obfuscate literals starting at 5-20KiB. It's
not really hung; it's just doing a lot of busy work obfuscating those
literals. The code it produces is also far from ideal, so it also takes
some time to finally compile.
The generated code also led to crashes. For example, using "garble build
-literals -tiny" on a package containing literals of over a megabyte,
our use of asthelper to remove comments and shuffle line numbers could
run out of stack memory.
This all points in one direction: we never designed "-literals" to deal
with large sizes. Set a source-code-size limit of 2KiB.
We alter the literals.txt test as well, to include a few 128KiB string
literals. Before this fix, "go test" would seemingly hang on that test
for over a minute (I did not wait any longer). With the fix, those large
literals are not obfuscated, so the test ends in its usual 1-3s.
As said in the const comment, I don't believe any of this is a big
problem. Come Go 1.16, most developers should stop using asset-bundling
code generators and use go:embed instead. If we wanted to somehow
obfuscate those, it would be an entirely separate feature.
And, if someone wants to work on obfuscating truly large literals for
any reason, we need good tests and benchmarks to ensure garble does not
consume CPU for minutes or run out of memory.
I also simplified the generate-literals test command. The only argument
that matters to the script is the filename, since it's used later on.
Fixes #178.
4 years ago
|
|
|
if len(x.Elts) > maxSizeBytes {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
data := make([]byte, 0, len(x.Elts))
|
|
|
|
|
|
|
|
for _, el := range x.Elts {
|
|
|
|
lit, ok := el.(*ast.BasicLit)
|
|
|
|
if !ok {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
value, err := strconv.Atoi(lit.Value)
|
|
|
|
if err != nil {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
data = append(data, byte(value))
|
|
|
|
}
|
set positions when using cursor.Replace
The regular obfuscation process simply modifies some simple nodes, such
as identifiers and strings. In those cases, we modify the nodes
in-place, meaning that their positions remain the same. This hasn't
caused any problems.
Literal obfuscation is trickier. Since we replace one expression with an
entirely different one, we use cursor.Replace. The new expression is
entirely made up on the spot, so it lacks position information.
This was causing problems. For example, in the added test input:
> garble -literals build
[stderr]
# test/main
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:4: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:6: misplaced compiler directive
dgcm4t6w.go:7: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:9: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:3: too many errors
The build errors are because we'd move the compiler directives, which
makes the compiler unhappy as they must be directly followed by a
function declaration.
The root cause there seems to be that, since the replacement nodes lack
position information, go/printer would try to estimate its printing
position by adding to the last known position. Since -literals adds
code, this would result in the printer position increasing rapidly, and
potentially printing directive comments earlier than needed.
For now, making the replacement nodes have the same position as the
original node seems to stop go/printer from making this mistake.
It's possible that this workaround won't be bulletproof forever, but it
works well for now, and I don't see a simpler workaround right now.
It would be possible to use fancier mechanisms like go/ast.CommentMap or
dave/dst, but those are a significant amount of added complexity as well.
Fixes #285.
4 years ago
|
|
|
cursor.Replace(withPos(obfuscateByteSlice(data), x.Pos()))
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
case *ast.BasicLit:
|
|
|
|
switch cursor.Name() {
|
|
|
|
case "Values", "Rhs", "Value", "Args", "X", "Y", "Results", "Elts":
|
|
|
|
default:
|
|
|
|
return true // we don't want to obfuscate imports etc.
|
|
|
|
}
|
|
|
|
|
do not try to obfuscate huge literals (#204)
It's common for asset bundling code generators to produce huge literals,
for example in strings. Our literal obfuscators are meant for relatively
small string-like literals that a human would write, such as URLs, file
paths, and English text.
I ran some quick experiments, and it seems like "garble build -literals"
appears to hang trying to obfuscate literals starting at 5-20KiB. It's
not really hung; it's just doing a lot of busy work obfuscating those
literals. The code it produces is also far from ideal, so it also takes
some time to finally compile.
The generated code also led to crashes. For example, using "garble build
-literals -tiny" on a package containing literals of over a megabyte,
our use of asthelper to remove comments and shuffle line numbers could
run out of stack memory.
This all points in one direction: we never designed "-literals" to deal
with large sizes. Set a source-code-size limit of 2KiB.
We alter the literals.txt test as well, to include a few 128KiB string
literals. Before this fix, "go test" would seemingly hang on that test
for over a minute (I did not wait any longer). With the fix, those large
literals are not obfuscated, so the test ends in its usual 1-3s.
As said in the const comment, I don't believe any of this is a big
problem. Come Go 1.16, most developers should stop using asset-bundling
code generators and use go:embed instead. If we wanted to somehow
obfuscate those, it would be an entirely separate feature.
And, if someone wants to work on obfuscating truly large literals for
any reason, we need good tests and benchmarks to ensure garble does not
consume CPU for minutes or run out of memory.
I also simplified the generate-literals test command. The only argument
that matters to the script is the filename, since it's used later on.
Fixes #178.
4 years ago
|
|
|
if x.Kind != token.STRING {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
if len(x.Value) > maxSizeBytes {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
typeInfo := info.TypeOf(x)
|
|
|
|
if typeInfo != types.Typ[types.String] && typeInfo != types.Typ[types.UntypedString] {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
value, err := strconv.Unquote(x.Value)
|
|
|
|
if err != nil {
|
|
|
|
panic(fmt.Sprintf("cannot unquote string: %v", err))
|
|
|
|
}
|
|
|
|
|
do not try to obfuscate huge literals (#204)
It's common for asset bundling code generators to produce huge literals,
for example in strings. Our literal obfuscators are meant for relatively
small string-like literals that a human would write, such as URLs, file
paths, and English text.
I ran some quick experiments, and it seems like "garble build -literals"
appears to hang trying to obfuscate literals starting at 5-20KiB. It's
not really hung; it's just doing a lot of busy work obfuscating those
literals. The code it produces is also far from ideal, so it also takes
some time to finally compile.
The generated code also led to crashes. For example, using "garble build
-literals -tiny" on a package containing literals of over a megabyte,
our use of asthelper to remove comments and shuffle line numbers could
run out of stack memory.
This all points in one direction: we never designed "-literals" to deal
with large sizes. Set a source-code-size limit of 2KiB.
We alter the literals.txt test as well, to include a few 128KiB string
literals. Before this fix, "go test" would seemingly hang on that test
for over a minute (I did not wait any longer). With the fix, those large
literals are not obfuscated, so the test ends in its usual 1-3s.
As said in the const comment, I don't believe any of this is a big
problem. Come Go 1.16, most developers should stop using asset-bundling
code generators and use go:embed instead. If we wanted to somehow
obfuscate those, it would be an entirely separate feature.
And, if someone wants to work on obfuscating truly large literals for
any reason, we need good tests and benchmarks to ensure garble does not
consume CPU for minutes or run out of memory.
I also simplified the generate-literals test command. The only argument
that matters to the script is the filename, since it's used later on.
Fixes #178.
4 years ago
|
|
|
if len(value) == 0 {
|
|
|
|
return true
|
|
|
|
}
|
do not try to obfuscate huge literals (#204)
It's common for asset bundling code generators to produce huge literals,
for example in strings. Our literal obfuscators are meant for relatively
small string-like literals that a human would write, such as URLs, file
paths, and English text.
I ran some quick experiments, and it seems like "garble build -literals"
appears to hang trying to obfuscate literals starting at 5-20KiB. It's
not really hung; it's just doing a lot of busy work obfuscating those
literals. The code it produces is also far from ideal, so it also takes
some time to finally compile.
The generated code also led to crashes. For example, using "garble build
-literals -tiny" on a package containing literals of over a megabyte,
our use of asthelper to remove comments and shuffle line numbers could
run out of stack memory.
This all points in one direction: we never designed "-literals" to deal
with large sizes. Set a source-code-size limit of 2KiB.
We alter the literals.txt test as well, to include a few 128KiB string
literals. Before this fix, "go test" would seemingly hang on that test
for over a minute (I did not wait any longer). With the fix, those large
literals are not obfuscated, so the test ends in its usual 1-3s.
As said in the const comment, I don't believe any of this is a big
problem. Come Go 1.16, most developers should stop using asset-bundling
code generators and use go:embed instead. If we wanted to somehow
obfuscate those, it would be an entirely separate feature.
And, if someone wants to work on obfuscating truly large literals for
any reason, we need good tests and benchmarks to ensure garble does not
consume CPU for minutes or run out of memory.
I also simplified the generate-literals test command. The only argument
that matters to the script is the filename, since it's used later on.
Fixes #178.
4 years ago
|
|
|
|
set positions when using cursor.Replace
The regular obfuscation process simply modifies some simple nodes, such
as identifiers and strings. In those cases, we modify the nodes
in-place, meaning that their positions remain the same. This hasn't
caused any problems.
Literal obfuscation is trickier. Since we replace one expression with an
entirely different one, we use cursor.Replace. The new expression is
entirely made up on the spot, so it lacks position information.
This was causing problems. For example, in the added test input:
> garble -literals build
[stderr]
# test/main
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:4: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:6: misplaced compiler directive
dgcm4t6w.go:7: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:9: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:3: too many errors
The build errors are because we'd move the compiler directives, which
makes the compiler unhappy as they must be directly followed by a
function declaration.
The root cause there seems to be that, since the replacement nodes lack
position information, go/printer would try to estimate its printing
position by adding to the last known position. Since -literals adds
code, this would result in the printer position increasing rapidly, and
potentially printing directive comments earlier than needed.
For now, making the replacement nodes have the same position as the
original node seems to stop go/printer from making this mistake.
It's possible that this workaround won't be bulletproof forever, but it
works well for now, and I don't see a simpler workaround right now.
It would be possible to use fancier mechanisms like go/ast.CommentMap or
dave/dst, but those are a significant amount of added complexity as well.
Fixes #285.
4 years ago
|
|
|
cursor.Replace(withPos(obfuscateString(value), x.Pos()))
|
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
return astutil.Apply(file, pre, post).(*ast.File)
|
|
|
|
}
|
|
|
|
|
set positions when using cursor.Replace
The regular obfuscation process simply modifies some simple nodes, such
as identifiers and strings. In those cases, we modify the nodes
in-place, meaning that their positions remain the same. This hasn't
caused any problems.
Literal obfuscation is trickier. Since we replace one expression with an
entirely different one, we use cursor.Replace. The new expression is
entirely made up on the spot, so it lacks position information.
This was causing problems. For example, in the added test input:
> garble -literals build
[stderr]
# test/main
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:4: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:6: misplaced compiler directive
dgcm4t6w.go:7: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:9: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:3: too many errors
The build errors are because we'd move the compiler directives, which
makes the compiler unhappy as they must be directly followed by a
function declaration.
The root cause there seems to be that, since the replacement nodes lack
position information, go/printer would try to estimate its printing
position by adding to the last known position. Since -literals adds
code, this would result in the printer position increasing rapidly, and
potentially printing directive comments earlier than needed.
For now, making the replacement nodes have the same position as the
original node seems to stop go/printer from making this mistake.
It's possible that this workaround won't be bulletproof forever, but it
works well for now, and I don't see a simpler workaround right now.
It would be possible to use fancier mechanisms like go/ast.CommentMap or
dave/dst, but those are a significant amount of added complexity as well.
Fixes #285.
4 years ago
|
|
|
// withPos sets any token.Pos fields under node which affect printing to pos.
|
|
|
|
// Note that we can't set all token.Pos fields, since some affect the semantics.
|
|
|
|
//
|
|
|
|
// This function is useful so that go/printer doesn't try to estimate position
|
|
|
|
// offsets, which can end up in printing comment directives too early.
|
|
|
|
//
|
|
|
|
// We don't set any "end" or middle positions, because they seem irrelevant.
|
|
|
|
func withPos(node ast.Node, pos token.Pos) ast.Node {
|
|
|
|
ast.Inspect(node, func(node ast.Node) bool {
|
|
|
|
switch node := node.(type) {
|
|
|
|
case *ast.BasicLit:
|
|
|
|
node.ValuePos = pos
|
|
|
|
case *ast.Ident:
|
|
|
|
node.NamePos = pos
|
|
|
|
case *ast.CompositeLit:
|
|
|
|
node.Lbrace = pos
|
make -literals succeed on all of std
Two bugs were remaining which made the build with -literals of std fail.
First, we were ignoring too many objects in constant expressions,
including type names. This resulted in type names declared in
dependencies which were incorrectly not obfuscated in the current
package:
# go/constant
O1ku7TCe.go:1: undefined: alzLJ5Fd.Word
b0ieEGVQ.go:1: undefined: alzLJ5Fd.Word
LEpgYKdb.go:4: undefined: alzLJ5Fd.Word
FkhHJCfm.go:1: undefined: alzLJ5Fd.Word
This edge case is easy to reproduce, so a test case is added to
literals.txt.
The second issue is trickier; in some packages like os/user, we would
get syntax errors because of comments printed out of place:
../tip/os/user/getgrouplist_unix.go:35:130: syntax error: unexpected newline, expecting comma or )
This is a similar kind of error that we tried to fix with e2f06cce94. In
particular, it's fixed by also setting CallExpr.Rparen in withPos. We
also add many other missing Pos fields for good measure, even though
we're not sure they help just yet.
Unfortunately, all my attempts to minimize this into a reproducible
failure have failed. We can't just copy the failing file from os/user,
as it only builds on some OSs. It seems like it was the perfect mix of
cgo (which adds line directive comments) plus unlucky positioning of
literals.
For that last reason, as well as for ensuring that -literals works well
with a wide variety of software, we add a build of all of std with
-literals when not testing with -short. This is akin to what we do in
goprivate.txt, but with the -literals flag. This does make "go test"
more expensive, but also more thorough.
Fixes #285, hopefully for good this time.
4 years ago
|
|
|
node.Rbrace = pos
|
set positions when using cursor.Replace
The regular obfuscation process simply modifies some simple nodes, such
as identifiers and strings. In those cases, we modify the nodes
in-place, meaning that their positions remain the same. This hasn't
caused any problems.
Literal obfuscation is trickier. Since we replace one expression with an
entirely different one, we use cursor.Replace. The new expression is
entirely made up on the spot, so it lacks position information.
This was causing problems. For example, in the added test input:
> garble -literals build
[stderr]
# test/main
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:4: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:6: misplaced compiler directive
dgcm4t6w.go:7: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:9: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:3: too many errors
The build errors are because we'd move the compiler directives, which
makes the compiler unhappy as they must be directly followed by a
function declaration.
The root cause there seems to be that, since the replacement nodes lack
position information, go/printer would try to estimate its printing
position by adding to the last known position. Since -literals adds
code, this would result in the printer position increasing rapidly, and
potentially printing directive comments earlier than needed.
For now, making the replacement nodes have the same position as the
original node seems to stop go/printer from making this mistake.
It's possible that this workaround won't be bulletproof forever, but it
works well for now, and I don't see a simpler workaround right now.
It would be possible to use fancier mechanisms like go/ast.CommentMap or
dave/dst, but those are a significant amount of added complexity as well.
Fixes #285.
4 years ago
|
|
|
case *ast.ArrayType:
|
|
|
|
node.Lbrack = pos
|
|
|
|
case *ast.FuncType:
|
|
|
|
node.Func = pos
|
make -literals succeed on all of std
Two bugs were remaining which made the build with -literals of std fail.
First, we were ignoring too many objects in constant expressions,
including type names. This resulted in type names declared in
dependencies which were incorrectly not obfuscated in the current
package:
# go/constant
O1ku7TCe.go:1: undefined: alzLJ5Fd.Word
b0ieEGVQ.go:1: undefined: alzLJ5Fd.Word
LEpgYKdb.go:4: undefined: alzLJ5Fd.Word
FkhHJCfm.go:1: undefined: alzLJ5Fd.Word
This edge case is easy to reproduce, so a test case is added to
literals.txt.
The second issue is trickier; in some packages like os/user, we would
get syntax errors because of comments printed out of place:
../tip/os/user/getgrouplist_unix.go:35:130: syntax error: unexpected newline, expecting comma or )
This is a similar kind of error that we tried to fix with e2f06cce94. In
particular, it's fixed by also setting CallExpr.Rparen in withPos. We
also add many other missing Pos fields for good measure, even though
we're not sure they help just yet.
Unfortunately, all my attempts to minimize this into a reproducible
failure have failed. We can't just copy the failing file from os/user,
as it only builds on some OSs. It seems like it was the perfect mix of
cgo (which adds line directive comments) plus unlucky positioning of
literals.
For that last reason, as well as for ensuring that -literals works well
with a wide variety of software, we add a build of all of std with
-literals when not testing with -short. This is akin to what we do in
goprivate.txt, but with the -literals flag. This does make "go test"
more expensive, but also more thorough.
Fixes #285, hopefully for good this time.
4 years ago
|
|
|
case *ast.BinaryExpr:
|
|
|
|
node.OpPos = pos
|
|
|
|
case *ast.StarExpr:
|
|
|
|
node.Star = pos
|
|
|
|
case *ast.CallExpr:
|
|
|
|
node.Lparen = pos
|
|
|
|
node.Rparen = pos
|
|
|
|
|
set positions when using cursor.Replace
The regular obfuscation process simply modifies some simple nodes, such
as identifiers and strings. In those cases, we modify the nodes
in-place, meaning that their positions remain the same. This hasn't
caused any problems.
Literal obfuscation is trickier. Since we replace one expression with an
entirely different one, we use cursor.Replace. The new expression is
entirely made up on the spot, so it lacks position information.
This was causing problems. For example, in the added test input:
> garble -literals build
[stderr]
# test/main
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:4: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:6: misplaced compiler directive
dgcm4t6w.go:7: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:9: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:3: too many errors
The build errors are because we'd move the compiler directives, which
makes the compiler unhappy as they must be directly followed by a
function declaration.
The root cause there seems to be that, since the replacement nodes lack
position information, go/printer would try to estimate its printing
position by adding to the last known position. Since -literals adds
code, this would result in the printer position increasing rapidly, and
potentially printing directive comments earlier than needed.
For now, making the replacement nodes have the same position as the
original node seems to stop go/printer from making this mistake.
It's possible that this workaround won't be bulletproof forever, but it
works well for now, and I don't see a simpler workaround right now.
It would be possible to use fancier mechanisms like go/ast.CommentMap or
dave/dst, but those are a significant amount of added complexity as well.
Fixes #285.
4 years ago
|
|
|
case *ast.GenDecl:
|
|
|
|
node.TokPos = pos
|
|
|
|
case *ast.ReturnStmt:
|
|
|
|
node.Return = pos
|
|
|
|
case *ast.ForStmt:
|
|
|
|
node.For = pos
|
|
|
|
case *ast.RangeStmt:
|
|
|
|
node.For = pos
|
make -literals succeed on all of std
Two bugs were remaining which made the build with -literals of std fail.
First, we were ignoring too many objects in constant expressions,
including type names. This resulted in type names declared in
dependencies which were incorrectly not obfuscated in the current
package:
# go/constant
O1ku7TCe.go:1: undefined: alzLJ5Fd.Word
b0ieEGVQ.go:1: undefined: alzLJ5Fd.Word
LEpgYKdb.go:4: undefined: alzLJ5Fd.Word
FkhHJCfm.go:1: undefined: alzLJ5Fd.Word
This edge case is easy to reproduce, so a test case is added to
literals.txt.
The second issue is trickier; in some packages like os/user, we would
get syntax errors because of comments printed out of place:
../tip/os/user/getgrouplist_unix.go:35:130: syntax error: unexpected newline, expecting comma or )
This is a similar kind of error that we tried to fix with e2f06cce94. In
particular, it's fixed by also setting CallExpr.Rparen in withPos. We
also add many other missing Pos fields for good measure, even though
we're not sure they help just yet.
Unfortunately, all my attempts to minimize this into a reproducible
failure have failed. We can't just copy the failing file from os/user,
as it only builds on some OSs. It seems like it was the perfect mix of
cgo (which adds line directive comments) plus unlucky positioning of
literals.
For that last reason, as well as for ensuring that -literals works well
with a wide variety of software, we add a build of all of std with
-literals when not testing with -short. This is akin to what we do in
goprivate.txt, but with the -literals flag. This does make "go test"
more expensive, but also more thorough.
Fixes #285, hopefully for good this time.
4 years ago
|
|
|
case *ast.BranchStmt:
|
|
|
|
node.TokPos = pos
|
set positions when using cursor.Replace
The regular obfuscation process simply modifies some simple nodes, such
as identifiers and strings. In those cases, we modify the nodes
in-place, meaning that their positions remain the same. This hasn't
caused any problems.
Literal obfuscation is trickier. Since we replace one expression with an
entirely different one, we use cursor.Replace. The new expression is
entirely made up on the spot, so it lacks position information.
This was causing problems. For example, in the added test input:
> garble -literals build
[stderr]
# test/main
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:4: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:6: misplaced compiler directive
dgcm4t6w.go:7: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:9: misplaced compiler directive
dgcm4t6w.go:3: misplaced compiler directive
dgcm4t6w.go:3: too many errors
The build errors are because we'd move the compiler directives, which
makes the compiler unhappy as they must be directly followed by a
function declaration.
The root cause there seems to be that, since the replacement nodes lack
position information, go/printer would try to estimate its printing
position by adding to the last known position. Since -literals adds
code, this would result in the printer position increasing rapidly, and
potentially printing directive comments earlier than needed.
For now, making the replacement nodes have the same position as the
original node seems to stop go/printer from making this mistake.
It's possible that this workaround won't be bulletproof forever, but it
works well for now, and I don't see a simpler workaround right now.
It would be possible to use fancier mechanisms like go/ast.CommentMap or
dave/dst, but those are a significant amount of added complexity as well.
Fixes #285.
4 years ago
|
|
|
}
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
return node
|
|
|
|
}
|
|
|
|
|
|
|
|
func obfuscateString(data string) *ast.CallExpr {
|
|
|
|
obfuscator := randObfuscator()
|
|
|
|
block := obfuscator.obfuscate([]byte(data))
|
|
|
|
|
|
|
|
block.List = append(block.List, ah.ReturnStmt(ah.CallExpr(ast.NewIdent("string"), ast.NewIdent("data"))))
|
|
|
|
|
|
|
|
return ah.LambdaCall(ast.NewIdent("string"), block)
|
|
|
|
}
|
|
|
|
|
|
|
|
func obfuscateByteSlice(data []byte) *ast.CallExpr {
|
|
|
|
obfuscator := randObfuscator()
|
|
|
|
block := obfuscator.obfuscate(data)
|
|
|
|
block.List = append(block.List, ah.ReturnStmt(ast.NewIdent("data")))
|
|
|
|
return ah.LambdaCall(&ast.ArrayType{Elt: ast.NewIdent("byte")}, block)
|
|
|
|
}
|
|
|
|
|
|
|
|
func obfuscateByteArray(data []byte, length int64) *ast.CallExpr {
|
|
|
|
obfuscator := randObfuscator()
|
|
|
|
block := obfuscator.obfuscate(data)
|
|
|
|
|
|
|
|
arrayType := &ast.ArrayType{
|
|
|
|
Len: ah.IntLit(int(length)),
|
|
|
|
Elt: ast.NewIdent("byte"),
|
|
|
|
}
|
|
|
|
|
|
|
|
sliceToArray := []ast.Stmt{
|
|
|
|
&ast.DeclStmt{
|
|
|
|
Decl: &ast.GenDecl{
|
|
|
|
Tok: token.VAR,
|
|
|
|
Specs: []ast.Spec{&ast.ValueSpec{
|
|
|
|
Names: []*ast.Ident{ast.NewIdent("newdata")},
|
|
|
|
Type: arrayType,
|
|
|
|
}},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
&ast.RangeStmt{
|
|
|
|
Key: ast.NewIdent("i"),
|
|
|
|
Tok: token.DEFINE,
|
|
|
|
X: ast.NewIdent("newdata"),
|
|
|
|
Body: &ast.BlockStmt{List: []ast.Stmt{
|
|
|
|
&ast.AssignStmt{
|
|
|
|
Lhs: []ast.Expr{ah.IndexExpr("newdata", ast.NewIdent("i"))},
|
|
|
|
Tok: token.ASSIGN,
|
|
|
|
Rhs: []ast.Expr{ah.IndexExpr("data", ast.NewIdent("i"))},
|
|
|
|
},
|
|
|
|
}},
|
|
|
|
},
|
|
|
|
ah.ReturnStmt(ast.NewIdent("newdata")),
|
|
|
|
}
|
|
|
|
|
|
|
|
block.List = append(block.List, sliceToArray...)
|
|
|
|
|
|
|
|
return ah.LambdaCall(arrayType, block)
|
|
|
|
}
|
|
|
|
|
|
|
|
// RecordUsedAsConstants records identifieres used in constant expressions.
|
|
|
|
func RecordUsedAsConstants(node ast.Node, info *types.Info, ignoreObj map[types.Object]bool) {
|
|
|
|
visit := func(node ast.Node) bool {
|
|
|
|
ident, ok := node.(*ast.Ident)
|
|
|
|
if !ok {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
make -literals succeed on all of std
Two bugs were remaining which made the build with -literals of std fail.
First, we were ignoring too many objects in constant expressions,
including type names. This resulted in type names declared in
dependencies which were incorrectly not obfuscated in the current
package:
# go/constant
O1ku7TCe.go:1: undefined: alzLJ5Fd.Word
b0ieEGVQ.go:1: undefined: alzLJ5Fd.Word
LEpgYKdb.go:4: undefined: alzLJ5Fd.Word
FkhHJCfm.go:1: undefined: alzLJ5Fd.Word
This edge case is easy to reproduce, so a test case is added to
literals.txt.
The second issue is trickier; in some packages like os/user, we would
get syntax errors because of comments printed out of place:
../tip/os/user/getgrouplist_unix.go:35:130: syntax error: unexpected newline, expecting comma or )
This is a similar kind of error that we tried to fix with e2f06cce94. In
particular, it's fixed by also setting CallExpr.Rparen in withPos. We
also add many other missing Pos fields for good measure, even though
we're not sure they help just yet.
Unfortunately, all my attempts to minimize this into a reproducible
failure have failed. We can't just copy the failing file from os/user,
as it only builds on some OSs. It seems like it was the perfect mix of
cgo (which adds line directive comments) plus unlucky positioning of
literals.
For that last reason, as well as for ensuring that -literals works well
with a wide variety of software, we add a build of all of std with
-literals when not testing with -short. This is akin to what we do in
goprivate.txt, but with the -literals flag. This does make "go test"
more expensive, but also more thorough.
Fixes #285, hopefully for good this time.
4 years ago
|
|
|
// Only record *types.Const objects.
|
|
|
|
// Other objects, such as builtins or type names,
|
|
|
|
// must not be recorded as they would be false positives.
|
|
|
|
obj := info.ObjectOf(ident)
|
make -literals succeed on all of std
Two bugs were remaining which made the build with -literals of std fail.
First, we were ignoring too many objects in constant expressions,
including type names. This resulted in type names declared in
dependencies which were incorrectly not obfuscated in the current
package:
# go/constant
O1ku7TCe.go:1: undefined: alzLJ5Fd.Word
b0ieEGVQ.go:1: undefined: alzLJ5Fd.Word
LEpgYKdb.go:4: undefined: alzLJ5Fd.Word
FkhHJCfm.go:1: undefined: alzLJ5Fd.Word
This edge case is easy to reproduce, so a test case is added to
literals.txt.
The second issue is trickier; in some packages like os/user, we would
get syntax errors because of comments printed out of place:
../tip/os/user/getgrouplist_unix.go:35:130: syntax error: unexpected newline, expecting comma or )
This is a similar kind of error that we tried to fix with e2f06cce94. In
particular, it's fixed by also setting CallExpr.Rparen in withPos. We
also add many other missing Pos fields for good measure, even though
we're not sure they help just yet.
Unfortunately, all my attempts to minimize this into a reproducible
failure have failed. We can't just copy the failing file from os/user,
as it only builds on some OSs. It seems like it was the perfect mix of
cgo (which adds line directive comments) plus unlucky positioning of
literals.
For that last reason, as well as for ensuring that -literals works well
with a wide variety of software, we add a build of all of std with
-literals when not testing with -short. This is akin to what we do in
goprivate.txt, but with the -literals flag. This does make "go test"
more expensive, but also more thorough.
Fixes #285, hopefully for good this time.
4 years ago
|
|
|
if _, ok := obj.(*types.Const); ok {
|
|
|
|
ignoreObj[obj] = true
|
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
switch x := node.(type) {
|
|
|
|
// in a slice or array composite literal all explicit keys must be constant representable
|
|
|
|
case *ast.CompositeLit:
|
|
|
|
if _, ok := x.Type.(*ast.ArrayType); !ok {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
for _, elt := range x.Elts {
|
|
|
|
if kv, ok := elt.(*ast.KeyValueExpr); ok {
|
|
|
|
ast.Inspect(kv.Key, visit)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// in an array type the length must be a constant representable
|
|
|
|
case *ast.ArrayType:
|
|
|
|
if x.Len != nil {
|
|
|
|
ast.Inspect(x.Len, visit)
|
|
|
|
}
|
|
|
|
// in a const declaration all values must be constant representable
|
|
|
|
case *ast.GenDecl:
|
|
|
|
if x.Tok != token.CONST {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
for _, spec := range x.Specs {
|
|
|
|
spec := spec.(*ast.ValueSpec)
|
|
|
|
|
|
|
|
for _, val := range spec.Values {
|
|
|
|
ast.Inspect(val, visit)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|