From c9b0b07853c7ff7d22091f50eb38c058ade46e4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Sun, 25 Apr 2021 16:36:37 +0100 Subject: [PATCH] hash field names equally in all packages Packages P1 and P2 can define identical struct types T1 and T2, and one can convert from type T1 to T2 or vice versa. The spec defines two identical struct types as: Two struct types are identical if they have the same sequence of fields, and if corresponding fields have the same names, and identical types, and identical tags. Non-exported field names from different packages are always different. Unfortunately, garble broke this: since we obfuscated field names differently depending on the package, cross-package conversions like the case above would result in typechecking errors. To fix this, implement Joe Tsai's idea: hash struct field names with the string representation of the entire struct. This way, identical struct types will have their field names obfuscated in the same way in all packages across a build. Note that we had to refactor "reverse" a bit to start using transformer, since now it needs to keep track of struct types as well. This failure was affecting the build of google.golang.org/protobuf, since it makes regular use of cross-package struct conversions. Note that the protobuf module still fails to build, but for other reasons. The package that used to fail now succeeds, so the build gets a bit further than before. #240 tracks adding relevant third-party Go modules to CI, so we'll track the other remaining failures there. Fixes #310. --- hash.go | 10 +-- main.go | 117 ++++++++++++++++++++++++++++----- reverse.go | 39 ++++++++--- shared.go | 2 +- testdata/scripts/implement.txt | 57 +++++++++++++++- 5 files changed, 195 insertions(+), 30 deletions(-) diff --git a/hash.go b/hash.go index 8aeaad3..289a95b 100644 --- a/hash.go +++ b/hash.go @@ -60,7 +60,7 @@ func alterToolVersion(tool string, args []string) error { toolID = []byte(line) } - contentID := addGarbleToBuildIDComponent(toolID) + contentID := addGarbleToHash(toolID) // The part of the build ID that matters is the last, since it's the // "content ID" which is used to work out whether there is a need to redo // the action (build) or not. Since cmd/go parses the last word in the @@ -72,13 +72,13 @@ func alterToolVersion(tool string, args []string) error { return nil } -// addGarbleToBuildIDComponent takes a build ID component hash, such as an -// action ID or a content ID, and returns a new hash which also contains -// garble's own deterministic inputs. +// addGarbleToHash takes some arbitrary input bytes, +// typically a hash such as an action ID or a content ID, +// and returns a new hash which also contains garble's own deterministic inputs. // // This includes garble's own version, obtained via its own binary's content ID, // as well as any other options which affect a build, such as GOPRIVATE and -tiny. -func addGarbleToBuildIDComponent(inputHash []byte) []byte { +func addGarbleToHash(inputHash []byte) []byte { // Join the two content IDs together into a single base64-encoded sha256 // sum. This includes the original tool's content ID, and garble's own // content ID. diff --git a/main.go b/main.go index a6eba5d..e3b4b89 100644 --- a/main.go +++ b/main.go @@ -595,7 +595,6 @@ func transformCompile(args []string) ([]string, error) { var files []*ast.File for _, path := range paths { - // Note that we want file, err := parser.ParseFile(fset, path, nil, parser.ParseComments) if err != nil { return nil, err @@ -611,18 +610,9 @@ func transformCompile(args []string) ([]string, error) { // log.Printf("seeding math/rand with %x\n", randSeed) mathrand.Seed(int64(binary.BigEndian.Uint64(randSeed))) - tf := &transformer{ - info: &types.Info{ - Types: make(map[ast.Expr]types.TypeAndValue), - Defs: make(map[*ast.Ident]types.Object), - Uses: make(map[*ast.Ident]types.Object), - }, - } - - origTypesConfig := types.Config{Importer: origImporter} - tf.pkg, err = origTypesConfig.Check(curPkg.ImportPath, fset, files, tf.info) - if err != nil { - return nil, fmt.Errorf("typecheck error: %v", err) + tf := newTransformer() + if err := tf.typecheck(files); err != nil { + return nil, err } tf.recordReflectArgs(files) @@ -1028,6 +1018,78 @@ type transformer struct { // * Types or variables from external packages which were not // obfuscated, for caching reasons; see transformGo. ignoreObjects map[types.Object]bool + + // These fields are used to locate struct types from any of their field + // objects. Useful when obfuscating field names. + fieldToStruct map[*types.Var]*types.Struct + recordTypeDone map[types.Type]bool +} + +// newTransformer helps initialize some maps. +func newTransformer() *transformer { + return &transformer{ + info: &types.Info{ + Types: make(map[ast.Expr]types.TypeAndValue), + Defs: make(map[*ast.Ident]types.Object), + Uses: make(map[*ast.Ident]types.Object), + }, + recordTypeDone: make(map[types.Type]bool), + fieldToStruct: make(map[*types.Var]*types.Struct), + } +} + +func (tf *transformer) typecheck(files []*ast.File) error { + origTypesConfig := types.Config{Importer: origImporter} + pkg, err := origTypesConfig.Check(curPkg.ImportPath, fset, files, tf.info) + if err != nil { + return fmt.Errorf("typecheck error: %v", err) + } + tf.pkg = pkg + + // Run recordType on all types reachable via types.Info. + // A bit hacky, but I could not find an easier way to do this. + for _, obj := range tf.info.Defs { + if obj != nil { + tf.recordType(obj.Type()) + } + } + for _, obj := range tf.info.Uses { + if obj != nil { + tf.recordType(obj.Type()) + } + } + for _, tv := range tf.info.Types { + tf.recordType(tv.Type) + } + return nil +} + +// recordType visits every reachable type after typechecking a package. +// Right now, all it does is fill the fieldToStruct field. +// Since types can be recursive, we need a map to avoid cycles. +func (tf *transformer) recordType(t types.Type) { + if tf.recordTypeDone[t] { + return + } + tf.recordTypeDone[t] = true + switch t := t.(type) { + case interface{ Elem() types.Type }: + tf.recordType(t.Elem()) + case *types.Named: + tf.recordType(t.Underlying()) + } + strct, _ := t.(*types.Struct) + if strct == nil { + return + } + for i := 0; i < strct.NumFields(); i++ { + field := strct.Field(i) + tf.fieldToStruct[field] = strct + + if field.Embedded() { + tf.recordType(field.Type()) + } + } } // transformGo obfuscates the provided Go syntax file. @@ -1085,6 +1147,7 @@ func (tf *transformer) transformGo(file *ast.File) *ast.File { if !lpkg.Private { return true // only private packages are transformed } + hashToUse := lpkg.GarbleActionID // log.Printf("%#v %T", node, obj) parentScope := obj.Parent() @@ -1095,6 +1158,30 @@ func (tf *transformer) transformGo(file *ast.File) *ast.File { return true } + // Fields don't get hashed with the package's action ID. + // They get hashed with the type of their parent struct. + // This is because one struct can be converted to another, + // as long as the underlying types are identical, + // even if the structs are defined in different packages. + // + // TODO: Consider only doing this for structs where all + // fields are exported. We only need this special case + // for cross-package conversions, which can't work if + // any field is unexported. If that is done, add a test + // that ensures unexported fields from different + // packages result in different obfuscated names. + if obj.IsField() { + strct := tf.fieldToStruct[obj] + if strct == nil { + panic("could not find for " + node.Name) + } + // TODO: We should probably strip field tags here. + // Do we need to do anything else to make a + // struct type "canonical"? + fieldsHash := []byte(strct.String()) + hashToUse = addGarbleToHash(fieldsHash) + } + // If the struct of this field was not obfuscated, do not obfuscate // any of that struct's fields. if parentScope != tf.pkg.Scope() && obj.IsField() && !obj.Embedded() { @@ -1172,8 +1259,8 @@ func (tf *transformer) transformGo(file *ast.File) *ast.File { origName := node.Name _ = origName // used for debug prints below - node.Name = hashWith(lpkg.GarbleActionID, node.Name) - // log.Printf("%q hashed with %x to %q", origName, lpkg.GarbleActionID, node.Name) + node.Name = hashWith(hashToUse, node.Name) + // log.Printf("%q hashed with %x to %q", origName, hashToUse, node.Name) return true } post := func(cursor *astutil.Cursor) bool { diff --git a/reverse.go b/reverse.go index 29504f8..90fc740 100644 --- a/reverse.go +++ b/reverse.go @@ -9,7 +9,7 @@ import ( "fmt" "go/ast" "go/parser" - "go/token" + "go/types" "io" "os" "path/filepath" @@ -53,37 +53,60 @@ func commandReverse(args []string) error { // export data only exposes exported names. Parsing Go files is cheap, // so it's unnecessary to try to avoid this cost. var replaces []string - fset := token.NewFileSet() for _, lpkg := range cache.ListedPackages { if !lpkg.Private { continue } - addReplace := func(str string) { - replaces = append(replaces, hashWith(lpkg.GarbleActionID, str), str) + curPkg = lpkg + + addReplace := func(hash []byte, str string) { + if hash == nil { + hash = lpkg.GarbleActionID + } + replaces = append(replaces, hashWith(hash, str), str) } // Package paths are obfuscated, too. - addReplace(lpkg.ImportPath) + addReplace(nil, lpkg.ImportPath) + var files []*ast.File for _, goFile := range lpkg.GoFiles { fullGoFile := filepath.Join(lpkg.Dir, goFile) file, err := parser.ParseFile(fset, fullGoFile, nil, 0) if err != nil { return err } + files = append(files, file) + } + tf := newTransformer() + if err := tf.typecheck(files); err != nil { + return err + } + for i, file := range files { + goFile := lpkg.GoFiles[i] ast.Inspect(file, func(node ast.Node) bool { switch node := node.(type) { // Replace names. // TODO: do var names ever show up in output? case *ast.FuncDecl: - addReplace(node.Name.Name) + addReplace(nil, node.Name.Name) case *ast.TypeSpec: - addReplace(node.Name.Name) + addReplace(nil, node.Name.Name) case *ast.Field: for _, name := range node.Names { - addReplace(name.Name) + obj, _ := tf.info.ObjectOf(name).(*types.Var) + if obj == nil || !obj.IsField() { + continue + } + strct := tf.fieldToStruct[obj] + if strct == nil { + panic("could not find for " + name.Name) + } + fieldsHash := []byte(strct.String()) + hashToUse := addGarbleToHash(fieldsHash) + addReplace(hashToUse, name.Name) } case *ast.CallExpr: diff --git a/shared.go b/shared.go index 1f97273..ed082b1 100644 --- a/shared.go +++ b/shared.go @@ -223,7 +223,7 @@ func setListedPackages(patterns []string) error { } if pkg.Export != "" { actionID := decodeHash(splitActionID(pkg.BuildID)) - pkg.GarbleActionID = addGarbleToBuildIDComponent(actionID) + pkg.GarbleActionID = addGarbleToHash(actionID) } cache.ListedPackages[pkg.ImportPath] = &pkg } diff --git a/testdata/scripts/implement.txt b/testdata/scripts/implement.txt index 06ce3f8..af1e95d 100644 --- a/testdata/scripts/implement.txt +++ b/testdata/scripts/implement.txt @@ -6,6 +6,13 @@ cmp stderr main.stderr ! binsubstr main$exe 'unexportedMethod' 'privateIface' +[short] stop # no need to verify this with -short + +# Check that the program works as expected without garble. +go build +exec ./main +cmp stderr main.stderr + -- go.mod -- module test/main @@ -13,7 +20,12 @@ go 1.16 -- main.go -- package main -import "test/main/tinyfmt" +import ( + "test/main/lib1" + "test/main/lib2" + "test/main/lib3" + "test/main/tinyfmt" +) type T string @@ -33,10 +45,53 @@ func (T) privateIface() {} var _ privateInterface = T("") +type StructUnnamed = struct { + Foo int + Bar struct { + Nested *[]string + } + lib3.StructEmbed +} + +var _ = lib1.Struct1(lib2.Struct2{}) + +var _ = StructUnnamed(lib2.Struct2{}) + func main() { tinyfmt.Println(T("foo")) tinyfmt.Println(T("foo").unexportedMethod()) } +-- lib1/lib1.go -- +package lib1 + +import "test/main/lib3" + +type Struct1 struct { + Foo int + Bar struct { + Nested *[]string + } + lib3.StructEmbed +} + +-- lib2/lib2.go -- +package lib2 + +import "test/main/lib3" + +type Struct2 struct { + Foo int + Bar struct { + Nested *[]string + } + lib3.StructEmbed +} +-- lib3/lib3.go -- +package lib3 + +type StructEmbed struct { + Baz interface{} +} -- tinyfmt/fmt.go -- package tinyfmt