Skip to content

Commit c7917ea

Browse files
authored
chore: expose original length when serving slim binaries (#17735)
This will be used in the extensions and desktop apps to enable compression AND progress reporting for the download by comparing the original content length to the amount of bytes written to disk. Closes #16340
1 parent 90e93a2 commit c7917ea

File tree

2 files changed

+173
-79
lines changed

2 files changed

+173
-79
lines changed

site/site.go

Lines changed: 104 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,34 @@ func New(opts *Options) *Handler {
108108
panic(fmt.Sprintf("Failed to parse html files: %v", err))
109109
}
110110

111-
binHashCache := newBinHashCache(opts.BinFS, opts.BinHashes)
112-
113111
mux := http.NewServeMux()
114-
mux.Handle("/bin/", http.StripPrefix("/bin", http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
112+
mux.Handle("/bin/", binHandler(opts.BinFS, newBinMetadataCache(opts.BinFS, opts.BinHashes)))
113+
mux.Handle("/", http.FileServer(
114+
http.FS(
115+
// OnlyFiles is a wrapper around the file system that prevents directory
116+
// listings. Directory listings are not required for the site file system, so we
117+
// exclude it as a security measure. In practice, this file system comes from our
118+
// open source code base, but this is considered a best practice for serving
119+
// static files.
120+
OnlyFiles(opts.SiteFS))),
121+
)
122+
buildInfoResponse, err := json.Marshal(opts.BuildInfo)
123+
if err != nil {
124+
panic("failed to marshal build info: " + err.Error())
125+
}
126+
handler.buildInfoJSON = html.EscapeString(string(buildInfoResponse))
127+
handler.handler = mux.ServeHTTP
128+
129+
handler.installScript, err = parseInstallScript(opts.SiteFS, opts.BuildInfo)
130+
if err != nil {
131+
opts.Logger.Warn(context.Background(), "could not parse install.sh, it will be unavailable", slog.Error(err))
132+
}
133+
134+
return handler
135+
}
136+
137+
func binHandler(binFS http.FileSystem, binMetadataCache *binMetadataCache) http.Handler {
138+
return http.StripPrefix("/bin", http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
115139
// Convert underscores in the filename to hyphens. We eventually want to
116140
// change our hyphen-based filenames to underscores, but we need to
117141
// support both for now.
@@ -122,7 +146,7 @@ func New(opts *Options) *Handler {
122146
if name == "" || name == "/" {
123147
// Serve the directory listing. This intentionally allows directory listings to
124148
// be served. This file system should not contain anything sensitive.
125-
http.FileServer(opts.BinFS).ServeHTTP(rw, r)
149+
http.FileServer(binFS).ServeHTTP(rw, r)
126150
return
127151
}
128152
if strings.Contains(name, "/") {
@@ -131,7 +155,8 @@ func New(opts *Options) *Handler {
131155
http.NotFound(rw, r)
132156
return
133157
}
134-
hash, err := binHashCache.getHash(name)
158+
159+
metadata, err := binMetadataCache.getMetadata(name)
135160
if xerrors.Is(err, os.ErrNotExist) {
136161
http.NotFound(rw, r)
137162
return
@@ -141,35 +166,26 @@ func New(opts *Options) *Handler {
141166
return
142167
}
143168

144-
// ETag header needs to be quoted.
145-
rw.Header().Set("ETag", fmt.Sprintf(`%q`, hash))
169+
// http.FileServer will not set Content-Length when performing chunked
170+
// transport encoding, which is used for large files like our binaries
171+
// so stream compression can be used.
172+
//
173+
// Clients like IDE extensions and the desktop apps can compare the
174+
// value of this header with the amount of bytes written to disk after
175+
// decompression to show progress. Without this, they cannot show
176+
// progress without disabling compression.
177+
//
178+
// There isn't really a spec for a length header for the "inner" content
179+
// size, but some nginx modules use this header.
180+
rw.Header().Set("X-Original-Content-Length", fmt.Sprintf("%d", metadata.sizeBytes))
181+
182+
// Get and set ETag header. Must be quoted.
183+
rw.Header().Set("ETag", fmt.Sprintf(`%q`, metadata.sha1Hash))
146184

147185
// http.FileServer will see the ETag header and automatically handle
148186
// If-Match and If-None-Match headers on the request properly.
149-
http.FileServer(opts.BinFS).ServeHTTP(rw, r)
150-
})))
151-
mux.Handle("/", http.FileServer(
152-
http.FS(
153-
// OnlyFiles is a wrapper around the file system that prevents directory
154-
// listings. Directory listings are not required for the site file system, so we
155-
// exclude it as a security measure. In practice, this file system comes from our
156-
// open source code base, but this is considered a best practice for serving
157-
// static files.
158-
OnlyFiles(opts.SiteFS))),
159-
)
160-
buildInfoResponse, err := json.Marshal(opts.BuildInfo)
161-
if err != nil {
162-
panic("failed to marshal build info: " + err.Error())
163-
}
164-
handler.buildInfoJSON = html.EscapeString(string(buildInfoResponse))
165-
handler.handler = mux.ServeHTTP
166-
167-
handler.installScript, err = parseInstallScript(opts.SiteFS, opts.BuildInfo)
168-
if err != nil {
169-
opts.Logger.Warn(context.Background(), "could not parse install.sh, it will be unavailable", slog.Error(err))
170-
}
171-
172-
return handler
187+
http.FileServer(binFS).ServeHTTP(rw, r)
188+
}))
173189
}
174190

175191
type Handler struct {
@@ -217,7 +233,7 @@ func (h *Handler) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
217233
h.handler.ServeHTTP(rw, r)
218234
return
219235
// If requesting assets, serve straight up with caching.
220-
case reqFile == "assets" || strings.HasPrefix(reqFile, "assets/"):
236+
case reqFile == "assets" || strings.HasPrefix(reqFile, "assets/") || strings.HasPrefix(reqFile, "icon/"):
221237
// It could make sense to cache 404s, but the problem is that during an
222238
// upgrade a load balancer may route partially to the old server, and that
223239
// would make new asset paths get cached as 404s and not load even once the
@@ -952,68 +968,95 @@ func RenderStaticErrorPage(rw http.ResponseWriter, r *http.Request, data ErrorPa
952968
}
953969
}
954970

955-
type binHashCache struct {
956-
binFS http.FileSystem
971+
type binMetadata struct {
972+
sizeBytes int64 // -1 if not known yet
973+
// SHA1 was chosen because it's fast to compute and reasonable for
974+
// determining if a file has changed. The ETag is not used a security
975+
// measure.
976+
sha1Hash string // always set if in the cache
977+
}
978+
979+
type binMetadataCache struct {
980+
binFS http.FileSystem
981+
originalHashes map[string]string
957982

958-
hashes map[string]string
959-
mut sync.RWMutex
960-
sf singleflight.Group
961-
sem chan struct{}
983+
metadata map[string]binMetadata
984+
mut sync.RWMutex
985+
sf singleflight.Group
986+
sem chan struct{}
962987
}
963988

964-
func newBinHashCache(binFS http.FileSystem, binHashes map[string]string) *binHashCache {
965-
b := &binHashCache{
966-
binFS: binFS,
967-
hashes: make(map[string]string, len(binHashes)),
968-
mut: sync.RWMutex{},
969-
sf: singleflight.Group{},
970-
sem: make(chan struct{}, 4),
989+
func newBinMetadataCache(binFS http.FileSystem, binSha1Hashes map[string]string) *binMetadataCache {
990+
b := &binMetadataCache{
991+
binFS: binFS,
992+
originalHashes: make(map[string]string, len(binSha1Hashes)),
993+
994+
metadata: make(map[string]binMetadata, len(binSha1Hashes)),
995+
mut: sync.RWMutex{},
996+
sf: singleflight.Group{},
997+
sem: make(chan struct{}, 4),
971998
}
972-
// Make a copy since we're gonna be mutating it.
973-
for k, v := range binHashes {
974-
b.hashes[k] = v
999+
1000+
// Previously we copied binSha1Hashes to the cache immediately. Since we now
1001+
// read other information like size from the file, we can't do that. Instead
1002+
// we copy the hashes to a different map that will be used to populate the
1003+
// cache on the first request.
1004+
for k, v := range binSha1Hashes {
1005+
b.originalHashes[k] = v
9751006
}
9761007

9771008
return b
9781009
}
9791010

980-
func (b *binHashCache) getHash(name string) (string, error) {
1011+
func (b *binMetadataCache) getMetadata(name string) (binMetadata, error) {
9811012
b.mut.RLock()
982-
hash, ok := b.hashes[name]
1013+
metadata, ok := b.metadata[name]
9831014
b.mut.RUnlock()
9841015
if ok {
985-
return hash, nil
1016+
return metadata, nil
9861017
}
9871018

9881019
// Avoid DOS by using a pool, and only doing work once per file.
989-
v, err, _ := b.sf.Do(name, func() (interface{}, error) {
1020+
v, err, _ := b.sf.Do(name, func() (any, error) {
9901021
b.sem <- struct{}{}
9911022
defer func() { <-b.sem }()
9921023

9931024
f, err := b.binFS.Open(name)
9941025
if err != nil {
995-
return "", err
1026+
return binMetadata{}, err
9961027
}
9971028
defer f.Close()
9981029

999-
h := sha1.New() //#nosec // Not used for cryptography.
1000-
_, err = io.Copy(h, f)
1030+
var metadata binMetadata
1031+
1032+
stat, err := f.Stat()
10011033
if err != nil {
1002-
return "", err
1034+
return binMetadata{}, err
1035+
}
1036+
metadata.sizeBytes = stat.Size()
1037+
1038+
if hash, ok := b.originalHashes[name]; ok {
1039+
metadata.sha1Hash = hash
1040+
} else {
1041+
h := sha1.New() //#nosec // Not used for cryptography.
1042+
_, err := io.Copy(h, f)
1043+
if err != nil {
1044+
return binMetadata{}, err
1045+
}
1046+
metadata.sha1Hash = hex.EncodeToString(h.Sum(nil))
10031047
}
10041048

1005-
hash := hex.EncodeToString(h.Sum(nil))
10061049
b.mut.Lock()
1007-
b.hashes[name] = hash
1050+
b.metadata[name] = metadata
10081051
b.mut.Unlock()
1009-
return hash, nil
1052+
return metadata, nil
10101053
})
10111054
if err != nil {
1012-
return "", err
1055+
return binMetadata{}, err
10131056
}
10141057

10151058
//nolint:forcetypeassert
1016-
return strings.ToLower(v.(string)), nil
1059+
return v.(binMetadata), nil
10171060
}
10181061

10191062
func applicationNameOrDefault(cfg codersdk.AppearanceConfig) string {

site/site_test.go

Lines changed: 69 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"time"
2020

2121
"github.com/go-chi/chi/v5"
22+
"github.com/go-chi/chi/v5/middleware"
2223
"github.com/google/uuid"
2324
"github.com/stretchr/testify/assert"
2425
"github.com/stretchr/testify/require"
@@ -373,11 +374,13 @@ func TestServingBin(t *testing.T) {
373374
delete(sampleBinFSMissingSha256, binCoderSha1)
374375

375376
type req struct {
376-
url string
377-
ifNoneMatch string
378-
wantStatus int
379-
wantBody []byte
380-
wantEtag string
377+
url string
378+
ifNoneMatch string
379+
wantStatus int
380+
wantBody []byte
381+
wantOriginalSize int
382+
wantEtag string
383+
compression bool
381384
}
382385
tests := []struct {
383386
name string
@@ -390,17 +393,27 @@ func TestServingBin(t *testing.T) {
390393
fs: sampleBinFS(),
391394
reqs: []req{
392395
{
393-
url: "/bin/coder-linux-amd64",
394-
wantStatus: http.StatusOK,
395-
wantBody: []byte("compressed"),
396-
wantEtag: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]),
396+
url: "/bin/coder-linux-amd64",
397+
wantStatus: http.StatusOK,
398+
wantBody: []byte("compressed"),
399+
wantOriginalSize: 10,
400+
wantEtag: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]),
397401
},
398402
// Test ETag support.
399403
{
400-
url: "/bin/coder-linux-amd64",
401-
ifNoneMatch: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]),
402-
wantStatus: http.StatusNotModified,
403-
wantEtag: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]),
404+
url: "/bin/coder-linux-amd64",
405+
ifNoneMatch: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]),
406+
wantStatus: http.StatusNotModified,
407+
wantOriginalSize: 10,
408+
wantEtag: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]),
409+
},
410+
// Test compression support with X-Original-Content-Length
411+
// header.
412+
{
413+
url: "/bin/coder-linux-amd64",
414+
wantStatus: http.StatusOK,
415+
wantOriginalSize: 10,
416+
compression: true,
404417
},
405418
{url: "/bin/GITKEEP", wantStatus: http.StatusNotFound},
406419
},
@@ -462,9 +475,24 @@ func TestServingBin(t *testing.T) {
462475
},
463476
reqs: []req{
464477
// We support both hyphens and underscores for compatibility.
465-
{url: "/bin/coder-linux-amd64", wantStatus: http.StatusOK, wantBody: []byte("embed")},
466-
{url: "/bin/coder_linux_amd64", wantStatus: http.StatusOK, wantBody: []byte("embed")},
467-
{url: "/bin/GITKEEP", wantStatus: http.StatusOK, wantBody: []byte("")},
478+
{
479+
url: "/bin/coder-linux-amd64",
480+
wantStatus: http.StatusOK,
481+
wantBody: []byte("embed"),
482+
wantOriginalSize: 5,
483+
},
484+
{
485+
url: "/bin/coder_linux_amd64",
486+
wantStatus: http.StatusOK,
487+
wantBody: []byte("embed"),
488+
wantOriginalSize: 5,
489+
},
490+
{
491+
url: "/bin/GITKEEP",
492+
wantStatus: http.StatusOK,
493+
wantBody: []byte(""),
494+
wantOriginalSize: 0,
495+
},
468496
},
469497
},
470498
}
@@ -482,12 +510,14 @@ func TestServingBin(t *testing.T) {
482510
require.Error(t, err, "extraction or read did not fail")
483511
}
484512

485-
srv := httptest.NewServer(site.New(&site.Options{
513+
site := site.New(&site.Options{
486514
Telemetry: telemetry.NewNoop(),
487515
BinFS: binFS,
488516
BinHashes: binHashes,
489517
SiteFS: rootFS,
490-
}))
518+
})
519+
compressor := middleware.NewCompressor(1, "text/*", "application/*")
520+
srv := httptest.NewServer(compressor.Handler(site))
491521
defer srv.Close()
492522

493523
// Create a context
@@ -502,6 +532,9 @@ func TestServingBin(t *testing.T) {
502532
if tr.ifNoneMatch != "" {
503533
req.Header.Set("If-None-Match", tr.ifNoneMatch)
504534
}
535+
if tr.compression {
536+
req.Header.Set("Accept-Encoding", "gzip")
537+
}
505538

506539
resp, err := http.DefaultClient.Do(req)
507540
require.NoError(t, err, "http do failed")
@@ -520,10 +553,28 @@ func TestServingBin(t *testing.T) {
520553
assert.Empty(t, gotBody, "body is not empty")
521554
}
522555

556+
if tr.compression {
557+
assert.Equal(t, "gzip", resp.Header.Get("Content-Encoding"), "content encoding is not gzip")
558+
} else {
559+
assert.Empty(t, resp.Header.Get("Content-Encoding"), "content encoding is not empty")
560+
}
561+
523562
if tr.wantEtag != "" {
524563
assert.NotEmpty(t, resp.Header.Get("ETag"), "etag header is empty")
525564
assert.Equal(t, tr.wantEtag, resp.Header.Get("ETag"), "etag did not match")
526565
}
566+
567+
if tr.wantOriginalSize > 0 {
568+
// This is a custom header that we set to help the
569+
// client know the size of the decompressed data. See
570+
// the comment in site.go.
571+
headerStr := resp.Header.Get("X-Original-Content-Length")
572+
assert.NotEmpty(t, headerStr, "X-Original-Content-Length header is empty")
573+
originalSize, err := strconv.Atoi(headerStr)
574+
if assert.NoErrorf(t, err, "could not parse X-Original-Content-Length header %q", headerStr) {
575+
assert.EqualValues(t, tr.wantOriginalSize, originalSize, "X-Original-Content-Length did not match")
576+
}
577+
}
527578
})
528579
}
529580
})

0 commit comments

Comments
 (0)