Skip to content

Commit 6cfac12

Browse files
authored
fix: fix the bytes encode/decode for redis cache (#153)
* fix: fix the bytes encode/decode for redis cache Using `string(data)` to convert the byte array to string introduces error in json marshal/unmarshal, hence causes error when returning cached response from redis. The reason is `Unmarshal` function in `encode/json` would replace invalid UTF-8 or invalid UTF-16 pairs with `U+FFFD`, therefore the `payload` string in `redisCachePayload` will actually change after json marshal/unmarshal since the byte array may contain invalid UTF-8/UTF-16 byte, the length of payload will thereby change, resulting the http server to find the declared length in header `Content-Length` mismatches the actual length of payload. The fix is to base64-encode/decode the byte array to string, thereby eliminates invalid UTF-8/UTF-16 bytes. * fix: add test case about encode/decode the cached value add test cases for base64 encode/decode the cached value * fix: adjust the waiting time of `queue_overflow_for_user` case to pass ci minimize the waiting time between two consecutive requests
1 parent 5b23001 commit 6cfac12

File tree

4 files changed

+72
-6
lines changed

4 files changed

+72
-6
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@
1010
/docs/.output
1111
/docs/.nuxt
1212
/docs/static/sw.js
13+
.idea

cache/redis_cache.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package cache
33
import (
44
"bytes"
55
"context"
6+
"encoding/base64"
67
"encoding/json"
78
"github.com/contentsquare/chproxy/config"
89
"github.com/contentsquare/chproxy/log"
@@ -118,13 +119,18 @@ func (r *redisCache) Get(key *Key) (*CachedData, error) {
118119
log.Errorf("Not able to fetch TTL for: %s ", key)
119120
}
120121

122+
decoded, err := base64.StdEncoding.DecodeString(payload.Payload)
123+
if err != nil {
124+
log.Errorf("failed to decode payload: %s , due to: %v ", payload.Payload, err)
125+
return nil, ErrMissing
126+
}
121127
value := &CachedData{
122128
ContentMetadata: ContentMetadata{
123129
Length: payload.Length,
124130
Type: payload.Type,
125131
Encoding: payload.Encoding,
126132
},
127-
Data: bytes.NewReader([]byte(payload.Payload)),
133+
Data: bytes.NewReader(decoded),
128134
Ttl: ttl,
129135
}
130136

@@ -137,8 +143,9 @@ func (r *redisCache) Put(reader io.Reader, contentMetadata ContentMetadata, key
137143
return 0, err
138144
}
139145

146+
encoded := base64.StdEncoding.EncodeToString(data)
140147
payload := &redisCachePayload{
141-
Length: contentMetadata.Length, Type: contentMetadata.Type, Encoding: contentMetadata.Encoding, Payload: string(data),
148+
Length: contentMetadata.Length, Type: contentMetadata.Type, Encoding: contentMetadata.Encoding, Payload: encoded,
142149
}
143150

144151
marshalled, err := json.Marshal(payload)

main_test.go

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import (
55
"compress/gzip"
66
"context"
77
"crypto/tls"
8+
"encoding/base64"
9+
"encoding/json"
810
"fmt"
911
"github.com/contentsquare/chproxy/cache"
1012
"io"
@@ -19,9 +21,9 @@ import (
1921
"testing"
2022
"time"
2123

24+
"github.com/alicebob/miniredis/v2"
2225
"github.com/contentsquare/chproxy/config"
2326
"github.com/contentsquare/chproxy/log"
24-
"github.com/alicebob/miniredis/v2"
2527
)
2628

2729
var testDir = "./temp-test-data"
@@ -365,7 +367,7 @@ func TestServe(t *testing.T) {
365367
str, err := redisClient.Get(key.String())
366368
checkErr(t, err)
367369

368-
if !strings.Contains(str, "Ok") || !strings.Contains(str, "text/plain") || !strings.Contains(str, "charset=utf-8") {
370+
if !strings.Contains(str, base64.StdEncoding.EncodeToString([]byte("Ok."))) || !strings.Contains(str, "text/plain") || !strings.Contains(str, "charset=utf-8") {
369371
t.Fatalf("result from cache query is wrong: %s", str)
370372
}
371373

@@ -376,6 +378,57 @@ func TestServe(t *testing.T) {
376378
},
377379
startHTTP,
378380
},
381+
{
382+
"http requests with caching in redis (testcase for base64 encoding/decoding)",
383+
"testdata/http.cache.redis.yml",
384+
func(t *testing.T) {
385+
redisClient.FlushAll()
386+
q := "SELECT 1 FORMAT TabSeparatedWithNamesAndTypes"
387+
req, err := http.NewRequest("GET", "http://127.0.0.1:9090?query="+url.QueryEscape(q), nil)
388+
checkErr(t, err)
389+
390+
resp := httpRequest(t, req, http.StatusOK)
391+
checkHttpResponse(t, resp, string(bytesWithInvalidUTFPairs))
392+
resp2 := httpRequest(t, req, http.StatusOK)
393+
// if we do not use base64 to encode/decode the cached payload, EOF error will be thrown here.
394+
checkHttpResponse(t, resp2, string(bytesWithInvalidUTFPairs))
395+
keys := redisClient.Keys()
396+
if len(keys) != 1 {
397+
t.Fatalf("unexpected amount of keys in redis: %v", len(keys))
398+
}
399+
400+
// check cached response
401+
key := &cache.Key{
402+
Query: []byte(q),
403+
AcceptEncoding: "gzip",
404+
Version: cache.Version,
405+
}
406+
str, err := redisClient.Get(key.String())
407+
checkErr(t, err)
408+
409+
type redisCachePayload struct {
410+
Length int64 `json:"l"`
411+
Type string `json:"t"`
412+
Encoding string `json:"enc"`
413+
Payload string `json:"payload"`
414+
}
415+
416+
var unMarshaledPayload redisCachePayload
417+
err = json.Unmarshal([]byte(str), &unMarshaledPayload)
418+
checkErr(t, err)
419+
if unMarshaledPayload.Payload != base64.StdEncoding.EncodeToString(bytesWithInvalidUTFPairs) {
420+
t.Fatalf("result from cache query is wrong: %s", str)
421+
}
422+
decoded, err := base64.StdEncoding.DecodeString(unMarshaledPayload.Payload)
423+
checkErr(t, err)
424+
425+
if unMarshaledPayload.Length != int64(len(decoded)) {
426+
t.Fatalf("the declared length %d and actual length %d is not same", unMarshaledPayload.Length, len(decoded))
427+
}
428+
},
429+
startHTTP,
430+
},
431+
379432
{
380433
"http gzipped POST request",
381434
"testdata/http.cache.yml",
@@ -706,6 +759,9 @@ func fakeCHHandler(w http.ResponseWriter, r *http.Request) {
706759
fakeCHState.sleep()
707760

708761
fmt.Fprint(w, "bar")
762+
case "SELECT 1 FORMAT TabSeparatedWithNamesAndTypes":
763+
w.WriteHeader(http.StatusOK)
764+
w.Write(bytesWithInvalidUTFPairs)
709765
default:
710766
if strings.Contains(string(query), killQueryPattern) {
711767
fakeCHState.kill()
@@ -715,6 +771,8 @@ func fakeCHHandler(w http.ResponseWriter, r *http.Request) {
715771
}
716772
}
717773

774+
var bytesWithInvalidUTFPairs = []byte{239, 191, 189, 1, 32, 50, 239, 191}
775+
718776
var fakeCHState = &stateCH{
719777
syncCH: make(chan struct{}),
720778
}

proxy_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,9 +251,9 @@ func TestReverseProxy_ServeHTTP1(t *testing.T) {
251251
p.users["default"].maxConcurrentQueries = 1
252252
p.users["default"].queueCh = make(chan struct{}, 1)
253253
go makeHeavyRequest(p, time.Millisecond*20)
254-
time.Sleep(time.Millisecond * 5)
254+
time.Sleep(time.Millisecond * 1) // in case ci runner is slow
255255
go makeHeavyRequest(p, time.Millisecond*20)
256-
time.Sleep(time.Millisecond * 5)
256+
time.Sleep(time.Millisecond * 1)
257257
return makeHeavyRequest(p, time.Millisecond*20)
258258
},
259259
},

0 commit comments

Comments
 (0)