-
Notifications
You must be signed in to change notification settings - Fork 937
chore: track terraform module source type in telemetry #15590
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ import ( | |
"net/http" | ||
"net/url" | ||
"os" | ||
"regexp" | ||
"runtime" | ||
"slices" | ||
"strings" | ||
|
@@ -680,9 +681,95 @@ func shouldSendRawModuleSource(source string) bool { | |
return strings.Contains(source, "registry.coder.com") | ||
} | ||
|
||
// ModuleSourceType is the type of source for a module. | ||
// For reference, see https://developer.hashicorp.com/terraform/language/modules/sources | ||
type ModuleSourceType string | ||
|
||
const ( | ||
ModuleSourceTypeLocal ModuleSourceType = "local" | ||
ModuleSourceTypeLocalAbs ModuleSourceType = "local_absolute" | ||
ModuleSourceTypePublicRegistry ModuleSourceType = "public_registry" | ||
ModuleSourceTypePrivateRegistry ModuleSourceType = "private_registry" | ||
ModuleSourceTypeCoderRegistry ModuleSourceType = "coder_registry" | ||
ModuleSourceTypeGitHub ModuleSourceType = "github" | ||
ModuleSourceTypeBitbucket ModuleSourceType = "bitbucket" | ||
ModuleSourceTypeGit ModuleSourceType = "git" | ||
ModuleSourceTypeMercurial ModuleSourceType = "mercurial" | ||
ModuleSourceTypeHTTP ModuleSourceType = "http" | ||
ModuleSourceTypeS3 ModuleSourceType = "s3" | ||
ModuleSourceTypeGCS ModuleSourceType = "gcs" | ||
ModuleSourceTypeUnknown ModuleSourceType = "unknown" | ||
) | ||
|
||
// Terraform supports a variety of module source types, like: | ||
// - local paths (./ or ../) | ||
// - absolute local paths (/) | ||
// - git URLs (git:: or git@) | ||
// - http URLs | ||
// - s3 URLs | ||
// | ||
// and more! | ||
// | ||
// See https://developer.hashicorp.com/terraform/language/modules/sources for an overview. | ||
// | ||
// This function attempts to classify the source type of a module. It's imperfect, | ||
// as checks that terraform actually does are pretty complicated. | ||
// See e.g. https://github.com/hashicorp/go-getter/blob/842d6c379e5e70d23905b8f6b5a25a80290acb66/detect.go#L47 | ||
// if you're interested in the complexity. | ||
func GetModuleSourceType(source string) ModuleSourceType { | ||
source = strings.TrimSpace(source) | ||
source = strings.ToLower(source) | ||
if strings.HasPrefix(source, "./") || strings.HasPrefix(source, "../") { | ||
return ModuleSourceTypeLocal | ||
} | ||
if strings.HasPrefix(source, "/") { | ||
return ModuleSourceTypeLocalAbs | ||
} | ||
// Match public registry modules in the format <NAMESPACE>/<NAME>/<PROVIDER> | ||
// Sources can have a `//...` suffix, which signifies a subdirectory. | ||
// The allowed characters are based on | ||
// https://developer.hashicorp.com/terraform/cloud-docs/api-docs/private-registry/modules#request-body-1 | ||
// because Hashicorp's documentation about module sources doesn't mention it. | ||
if matched, _ := regexp.MatchString(`^[a-zA-Z0-9_-]+/[a-zA-Z0-9_-]+/[a-zA-Z0-9_-]+(//.*)?$`, source); matched { | ||
return ModuleSourceTypePublicRegistry | ||
} | ||
if strings.Contains(source, "github.com") { | ||
return ModuleSourceTypeGitHub | ||
} | ||
if strings.Contains(source, "bitbucket.org") { | ||
return ModuleSourceTypeBitbucket | ||
} | ||
if strings.HasPrefix(source, "git::") || strings.HasPrefix(source, "git@") { | ||
return ModuleSourceTypeGit | ||
} | ||
if strings.HasPrefix(source, "hg::") { | ||
return ModuleSourceTypeMercurial | ||
} | ||
if strings.HasPrefix(source, "http://") || strings.HasPrefix(source, "https://") { | ||
return ModuleSourceTypeHTTP | ||
} | ||
if strings.HasPrefix(source, "s3::") { | ||
return ModuleSourceTypeS3 | ||
} | ||
if strings.HasPrefix(source, "gcs::") { | ||
return ModuleSourceTypeGCS | ||
} | ||
if strings.Contains(source, "registry.terraform.io") { | ||
return ModuleSourceTypePublicRegistry | ||
} | ||
if strings.Contains(source, "app.terraform.io") || strings.Contains(source, "localterraform.com") { | ||
return ModuleSourceTypePrivateRegistry | ||
} | ||
if strings.Contains(source, "registry.coder.com") { | ||
return ModuleSourceTypeCoderRegistry | ||
} | ||
Comment on lines
+757
to
+765
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @johnstcn I wasn't aware that people could access the coder registry that way. How does this work? Can people self-host our registry? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As I write this, I realise that prefix-checking won't even help here unless we attempt to parse the module as an URL and extract only the hostname. In any case, I don't see any reason that someone sufficiently motiviated couldn't do this if they controlled the domain It's more of a contrived hypothetical though and I'm not going to block on this! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's acceptable to misclassify a small percentage of modules. With telemetry, we’re more focused on aggregates, and cases like this are unlikely to affect the overall picture. |
||
return ModuleSourceTypeUnknown | ||
} | ||
|
||
func ConvertWorkspaceModule(module database.WorkspaceModule) WorkspaceModule { | ||
source := module.Source | ||
version := module.Version | ||
sourceType := GetModuleSourceType(source) | ||
if !shouldSendRawModuleSource(source) { | ||
source = fmt.Sprintf("%x", sha256.Sum256([]byte(source))) | ||
version = fmt.Sprintf("%x", sha256.Sum256([]byte(version))) | ||
|
@@ -694,6 +781,7 @@ func ConvertWorkspaceModule(module database.WorkspaceModule) WorkspaceModule { | |
Transition: module.Transition, | ||
Source: source, | ||
Version: version, | ||
SourceType: sourceType, | ||
Key: module.Key, | ||
CreatedAt: module.CreatedAt, | ||
} | ||
|
@@ -938,6 +1026,7 @@ type WorkspaceModule struct { | |
Key string `json:"key"` | ||
Version string `json:"version"` | ||
Source string `json:"source"` | ||
SourceType ModuleSourceType `json:"source_type"` | ||
} | ||
|
||
type WorkspaceAgent struct { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -133,7 +133,7 @@ func TestTelemetry(t *testing.T) { | |
}) | ||
_ = dbgen.WorkspaceModule(t, db, database.WorkspaceModule{ | ||
JobID: pj.ID, | ||
Source: "internal-url.com/some-module", | ||
Source: "https://internal-url.com/some-module", | ||
Version: "1.0.0", | ||
}) | ||
_, snapshot := collectSnapshot(t, db, nil) | ||
|
@@ -142,10 +142,89 @@ func TestTelemetry(t *testing.T) { | |
sort.Slice(modules, func(i, j int) bool { | ||
return modules[i].Source < modules[j].Source | ||
}) | ||
require.Equal(t, modules[0].Source, "921c61d6f3eef5118f3cae658d1518b378c5b02a4955a766c791440894d989c5") | ||
require.Equal(t, modules[0].Source, "ed662ec0396db67e77119f14afcb9253574cc925b04a51d4374bcb1eae299f5d") | ||
require.Equal(t, modules[0].Version, "92521fc3cbd964bdc9f584a991b89fddaa5754ed1cc96d6d42445338669c1305") | ||
require.Equal(t, modules[0].SourceType, telemetry.ModuleSourceTypeHTTP) | ||
require.Equal(t, modules[1].Source, "registry.coder.com/terraform/aws") | ||
require.Equal(t, modules[1].Version, "1.0.0") | ||
require.Equal(t, modules[1].SourceType, telemetry.ModuleSourceTypeCoderRegistry) | ||
}) | ||
t.Run("ModuleSourceType", func(t *testing.T) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. praise: nice test coverage! |
||
t.Parallel() | ||
cases := []struct { | ||
source string | ||
want telemetry.ModuleSourceType | ||
}{ | ||
// Local relative paths | ||
{source: "./modules/terraform-aws-vpc", want: telemetry.ModuleSourceTypeLocal}, | ||
{source: "../shared/modules/vpc", want: telemetry.ModuleSourceTypeLocal}, | ||
{source: " ./my-module ", want: telemetry.ModuleSourceTypeLocal}, // with whitespace | ||
|
||
// Local absolute paths | ||
{source: "/opt/terraform/modules/vpc", want: telemetry.ModuleSourceTypeLocalAbs}, | ||
{source: "/Users/dev/modules/app", want: telemetry.ModuleSourceTypeLocalAbs}, | ||
{source: "/etc/terraform/modules/network", want: telemetry.ModuleSourceTypeLocalAbs}, | ||
|
||
// Public registry | ||
{source: "hashicorp/consul/aws", want: telemetry.ModuleSourceTypePublicRegistry}, | ||
{source: "registry.terraform.io/hashicorp/aws", want: telemetry.ModuleSourceTypePublicRegistry}, | ||
{source: "terraform-aws-modules/vpc/aws", want: telemetry.ModuleSourceTypePublicRegistry}, | ||
{source: "hashicorp/consul/aws//modules/consul-cluster", want: telemetry.ModuleSourceTypePublicRegistry}, | ||
{source: "hashicorp/co-nsul/aw_s//modules/consul-cluster", want: telemetry.ModuleSourceTypePublicRegistry}, | ||
|
||
// Private registry | ||
{source: "app.terraform.io/company/vpc/aws", want: telemetry.ModuleSourceTypePrivateRegistry}, | ||
{source: "localterraform.com/org/module", want: telemetry.ModuleSourceTypePrivateRegistry}, | ||
{source: "APP.TERRAFORM.IO/test/module", want: telemetry.ModuleSourceTypePrivateRegistry}, // case insensitive | ||
|
||
// Coder registry | ||
{source: "registry.coder.com/terraform/aws", want: telemetry.ModuleSourceTypeCoderRegistry}, | ||
{source: "registry.coder.com/modules/base", want: telemetry.ModuleSourceTypeCoderRegistry}, | ||
{source: "REGISTRY.CODER.COM/test/module", want: telemetry.ModuleSourceTypeCoderRegistry}, // case insensitive | ||
|
||
// GitHub | ||
{source: "github.com/hashicorp/terraform-aws-vpc", want: telemetry.ModuleSourceTypeGitHub}, | ||
{source: "git::https://github.com/org/repo.git", want: telemetry.ModuleSourceTypeGitHub}, | ||
{source: "git::https://github.com/org/repo//modules/vpc", want: telemetry.ModuleSourceTypeGitHub}, | ||
|
||
// Bitbucket | ||
{source: "bitbucket.org/hashicorp/terraform-aws-vpc", want: telemetry.ModuleSourceTypeBitbucket}, | ||
{source: "git::https://bitbucket.org/org/repo.git", want: telemetry.ModuleSourceTypeBitbucket}, | ||
{source: "https://bitbucket.org/org/repo//modules/vpc", want: telemetry.ModuleSourceTypeBitbucket}, | ||
|
||
// Generic Git | ||
{source: "git::ssh://git.internal.com/repo.git", want: telemetry.ModuleSourceTypeGit}, | ||
{source: "git@gitlab.com:org/repo.git", want: telemetry.ModuleSourceTypeGit}, | ||
{source: "git::https://git.internal.com/repo.git?ref=v1.0.0", want: telemetry.ModuleSourceTypeGit}, | ||
|
||
// Mercurial | ||
{source: "hg::https://example.com/vpc.hg", want: telemetry.ModuleSourceTypeMercurial}, | ||
{source: "hg::http://example.com/vpc.hg", want: telemetry.ModuleSourceTypeMercurial}, | ||
{source: "hg::ssh://example.com/vpc.hg", want: telemetry.ModuleSourceTypeMercurial}, | ||
|
||
// HTTP | ||
{source: "https://example.com/vpc-module.zip", want: telemetry.ModuleSourceTypeHTTP}, | ||
{source: "http://example.com/modules/vpc", want: telemetry.ModuleSourceTypeHTTP}, | ||
{source: "https://internal.network/terraform/modules", want: telemetry.ModuleSourceTypeHTTP}, | ||
|
||
// S3 | ||
{source: "s3::https://s3-eu-west-1.amazonaws.com/bucket/vpc", want: telemetry.ModuleSourceTypeS3}, | ||
{source: "s3::https://bucket.s3.amazonaws.com/vpc", want: telemetry.ModuleSourceTypeS3}, | ||
{source: "s3::http://bucket.s3.amazonaws.com/vpc?version=1", want: telemetry.ModuleSourceTypeS3}, | ||
|
||
// GCS | ||
{source: "gcs::https://www.googleapis.com/storage/v1/bucket/vpc", want: telemetry.ModuleSourceTypeGCS}, | ||
{source: "gcs::https://storage.googleapis.com/bucket/vpc", want: telemetry.ModuleSourceTypeGCS}, | ||
{source: "gcs::https://bucket.storage.googleapis.com/vpc", want: telemetry.ModuleSourceTypeGCS}, | ||
|
||
// Unknown | ||
{source: "custom://example.com/vpc", want: telemetry.ModuleSourceTypeUnknown}, | ||
{source: "something-random", want: telemetry.ModuleSourceTypeUnknown}, | ||
{source: "", want: telemetry.ModuleSourceTypeUnknown}, | ||
} | ||
for _, c := range cases { | ||
require.Equal(t, c.want, telemetry.GetModuleSourceType(c.source)) | ||
} | ||
}) | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It might be good for future readers to add a doc comment to this function along with a link to https://developer.hashicorp.com/terraform/language/modules/sources, as they may have some similar questions to me before reading: