Skip to content

Commit 97b5b39

Browse files
authored
check the num of active hosts to make sure that we could retry the query (#270)
* check the num of active hosts to make sure that we could retry the query * refactoring the code * fix whitespace * refactor retry logic, no need to calculate host number * use never end for loop * loop with break * loop without break * loop fix * fix get host * fix newhost * fix sessionId stickiness * fix sessionId
1 parent 67c4a41 commit 97b5b39

File tree

1 file changed

+29
-52
lines changed

1 file changed

+29
-52
lines changed

proxy.go

Lines changed: 29 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -166,62 +166,19 @@ func (rp *reverseProxy) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
166166
func executeWithRetry(
167167
ctx context.Context,
168168
s *scope,
169-
retryNum int,
169+
maxRetry int,
170170
rp func(http.ResponseWriter, *http.Request),
171171
rw ResponseWriterWithCode,
172172
srw StatResponseWriter,
173173
req *http.Request,
174174
monitorDuration func(float64),
175175
monitorRetryRequestInc func(prometheus.Labels),
176176
) (float64, error) {
177-
// num of replicas should > 1,
178-
// when the host is unavailable,
179-
// it could make sure there might be another set of replicas contains all the data of clickhouse
180177
startTime := time.Now()
181178
var since float64
182-
numReplicas := len(s.host.replica.cluster.replicas)
183-
if numReplicas > 1 && retryNum <= numReplicas {
184-
for i := 0; i <= retryNum; i++ {
185-
rp(rw, req)
186179

187-
err := ctx.Err()
188-
if err != nil {
189-
since = time.Since(startTime).Seconds()
190-
191-
return since, err
192-
}
193-
srw.SetStatusCode(rw.StatusCode())
194-
if rw.StatusCode() == http.StatusBadGateway {
195-
log.Debugf("the invalid host is: %s", s.host.addr)
196-
if i == retryNum {
197-
since = time.Since(startTime).Seconds()
198-
monitorDuration(since)
199-
200-
s.host.penalize()
201-
q := getQuerySnippet(req)
202-
err1 := fmt.Errorf("%s: cannot reach %s; query: %q", s, s.host.addr.Host, q)
203-
respondWith(srw, err1, srw.StatusCode())
204-
return since, nil
205-
} else {
206-
// the query execution has been failed
207-
s.host.penalize()
208-
s.host.dec()
209-
atomic.StoreUint32(&s.host.active, uint32(0))
210-
monitorRetryRequestInc(s.labels)
211-
h := s.host
212-
s.host = h.replica.cluster.getHost()
213-
214-
req.URL.Host = s.host.addr.Host
215-
req.URL.Scheme = s.host.addr.Scheme
216-
log.Debugf("the valid host is: %s", s.host.addr)
217-
}
218-
} else {
219-
since = time.Since(startTime).Seconds()
220-
221-
return since, nil
222-
}
223-
}
224-
} else {
180+
numRetry := 0
181+
for {
225182
rp(rw, req)
226183

227184
err := ctx.Err()
@@ -232,19 +189,39 @@ func executeWithRetry(
232189
}
233190
// The request has been successfully proxied.
234191

235-
since = time.Since(startTime).Seconds()
236-
monitorDuration(since)
237-
238192
srw.SetStatusCode(rw.StatusCode())
239193
// StatusBadGateway response is returned by http.ReverseProxy when
240194
// it cannot establish connection to remote host.
241195
if rw.StatusCode() == http.StatusBadGateway {
242196
log.Debugf("the invalid host is: %s", s.host.addr)
243197
s.host.penalize()
244-
q := getQuerySnippet(req)
245-
err1 := fmt.Errorf("%s: cannot reach %s; query: %q", s, s.host.addr.Host, q)
246-
respondWith(srw, err1, srw.StatusCode())
198+
s.host.dec()
199+
atomic.StoreUint32(&s.host.active, uint32(0))
200+
newHost := s.host.replica.cluster.getHost()
201+
// The query could be retried if it has no stickiness to a certain server
202+
if numRetry < maxRetry && newHost.isActive() && s.sessionId == "" {
203+
// the query execution has been failed
204+
monitorRetryRequestInc(s.labels)
205+
206+
// update host
207+
s.host = newHost
208+
209+
req.URL.Host = s.host.addr.Host
210+
req.URL.Scheme = s.host.addr.Scheme
211+
log.Debugf("the valid host is: %s", s.host.addr)
212+
} else {
213+
since = time.Since(startTime).Seconds()
214+
monitorDuration(since)
215+
q := getQuerySnippet(req)
216+
err1 := fmt.Errorf("%s: cannot reach %s; query: %q", s, s.host.addr.Host, q)
217+
respondWith(srw, err1, srw.StatusCode())
218+
break
219+
}
220+
} else {
221+
since = time.Since(startTime).Seconds()
222+
break
247223
}
224+
numRetry++
248225
}
249226
return since, nil
250227
}

0 commit comments

Comments
 (0)