Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@ models/*
third_party/whisper.cpp
.env
dist

/.vscode
36 changes: 36 additions & 0 deletions cmd/crtowebsocket/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Chromium to WebSocket

This is a simple HTTP server that listens for WebSpeech connections from a
Chromium browser, converts audio data to WAV format, streams it to a WebSocket
client, receives text data from the WebSocket client, and sends it back to the
Chromium browser.

## Usage

```bash
go run .
```

## Building

```bash
go build -o crtowebsocket .
```

## Running

```bash
./crtowebocket
```

## Building for release

```bash
go build -o crtowebsocket .
```

## Running for release

```bash
./crtowebsocket
```
91 changes: 91 additions & 0 deletions cmd/crtowebsocket/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package main

import (
"net/http"
"os"
"strconv"
"time"

"github.com/brave-experiments/go-stt/cr_api_websocket_proxy"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/urfave/cli/v2"
)

// Configuration for the remote WebSocket STT service
const (
version = "1"
defaultListenAddress = "127.0.0.1:8090"
defaultWebsocketURL = "ws://127.0.0.1:8080/api-speech-wss/"
)

func main() {
zerolog.SetGlobalLevel(zerolog.InfoLevel)
log.Logger = log.Output(
zerolog.ConsoleWriter{
Out: os.Stderr,
NoColor: true,
},
)
zerolog.CallerMarshalFunc = func(pc uintptr, file string, line int) string {
short := file
for i := len(file) - 1; i > 0; i-- {
if file[i] == '/' {
short = file[i+1:]
break
}
}
file = short
return file + ":" + strconv.Itoa(line)
}

zerolog.SetGlobalLevel(zerolog.DebugLevel)
log.Logger = log.With().Caller().Logger()

app := cli.NewApp()
app.Name = "Chromium WebSpeech API Endpoint to WebSocket proxy"
app.Version = version
app.Flags = []cli.Flag{
&cli.StringFlag{
Name: "listen-address",
Value: defaultListenAddress,
},
&cli.StringFlag{
Name: "websocket-url",
Value: defaultWebsocketURL,
},
&cli.DurationFlag{
Name: "timeout",
Value: 60 * time.Second,
},
&cli.BoolFlag{
Name: "try-to-finalize-text",
Value: false,
},
}
app.Action = run

if err := app.Run(os.Args); err != nil {
log.Fatal().Err(err)
}
}

func run(c *cli.Context) error {
// Create a configuration struct
config := &cr_api_websocket_proxy.HandlerConfig{
WebsocketURL: c.String("websocket-url"),
Timeout: c.Duration("timeout"),
TryToFinalizeText: c.Bool("try-to-finalize-text"),
}

// Create a handler instance with the config
handler := cr_api_websocket_proxy.NewHandler(config)

// Register handlers that have access to the config
http.HandleFunc("/up", handler.HandleUpstreamRequest)
http.HandleFunc("/down", handler.HandleDownstreamRequest)

http.ListenAndServe(c.String("listen-address"), nil)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reported by reviewdog 🐶
[semgrep] The profiling 'pprof' endpoint is automatically exposed on /debug/pprof. This could leak information about the server. Instead, use import "net/http/pprof". See https://www.farsightsecurity.com/blog/txt-record/go-remote-profiling-20161028/ for more information and mitigation.

Source: https://semgrep.dev/r/go.lang.security.audit.net.pprof.pprof-debug-exposure


Cc @thypon @kdenhartog


return nil
}
7 changes: 7 additions & 0 deletions cmd/crtowebsocket/pprof.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
//go:build pprof

package main

import (
_ "net/http/pprof"
)
51 changes: 51 additions & 0 deletions cr_api_websocket_proxy/audio.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package cr_api_websocket_proxy

import (
"fmt"
"net/http"

"azul3d.org/engine/audio"
"github.com/colega/zeropool"

// Add flac decoder for decoding incoming audio
_ "azul3d.org/engine/audio/flac"
)

const expectedSampleRate = 16000

const (
samplesPerChunk = expectedSampleRate / 1000 * 20 // 20ms
bytesPerChunk = samplesPerChunk * 2
)

var audioSamplesBufferPool = zeropool.New(
func() audio.Int16 {
return make(
audio.Int16,
samplesPerChunk,
)
},
)

var audioBytesBufferPool = zeropool.New(
func() []byte {
return make(
[]byte,
bytesPerChunk,
)
},
)

func NewAudioDecoder(req *http.Request) (audio.Decoder, error) {
dec, _, err := audio.NewDecoder(req.Body)
if err != nil {
return nil, err
}

// Ensure we're working with the correct sample rate
if dec.Config().SampleRate != expectedSampleRate {
return nil, fmt.Errorf("unexpected sample rate: %d", dec.Config().SampleRate)
}

return dec, nil
}
112 changes: 112 additions & 0 deletions cr_api_websocket_proxy/audio_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package cr_api_websocket_proxy

import (
"bytes"
"io"
"net/http"
"os"
"testing"

"azul3d.org/engine/audio"
)

func TestAudioBufferPools(t *testing.T) {
t.Run("samples buffer pool", func(t *testing.T) {
samples := audioSamplesBufferPool.Get()
if samples == nil {
t.Error("expected non-nil samples buffer")
}
if got := len(samples); got != samplesPerChunk {
t.Errorf("samples buffer length = %v, want %v", got, samplesPerChunk)
}

audioSamplesBufferPool.Put(samples)
})

t.Run("bytes buffer pool", func(t *testing.T) {
buffer := audioBytesBufferPool.Get()
if buffer == nil {
t.Error("expected non-nil bytes buffer")
}
if got := len(buffer); got != bytesPerChunk {
t.Errorf("bytes buffer length = %v, want %v", got, bytesPerChunk)
}

audioBytesBufferPool.Put(buffer)
})
}

type mockBody struct {
*bytes.Buffer
}

func (m mockBody) Close() error {
return nil
}

func TestFlacDecoder_InvalidData(t *testing.T) {
t.Run("invalid audio data", func(t *testing.T) {
req := &http.Request{
Body: mockBody{bytes.NewBuffer([]byte("invalid audio data"))},
}

decoder, err := NewAudioDecoder(req)
if err == nil {
t.Error("expected error for invalid audio data")
}
if decoder != nil {
t.Error("expected nil decoder for invalid audio data")
}
})
}

func TestFlacDecoder_ValidData(t *testing.T) {
req := &http.Request{
Body: mockBody{bytes.NewBuffer(readTestFile(t, "testdata/16khz.flac"))},
}

decoder, err := NewAudioDecoder(req)
if err != nil {
t.Fatalf("failed to create decoder: %v", err)
}

// Verify decoder config
config := decoder.Config()
if config.SampleRate != expectedSampleRate {
t.Errorf("sample rate = %v, want %v", config.SampleRate, expectedSampleRate)
}

// Try reading some samples
samples := make(audio.Int16, 1024)
n, err := decoder.Read(samples)
if err != nil && err != io.EOF {
t.Errorf("failed to read samples: %v", err)
}
if n == 0 {
t.Error("expected to read some samples")
}
}

func TestFlacDecoder_InvalidSampleRate(t *testing.T) {
req := &http.Request{
Body: mockBody{bytes.NewBuffer(readTestFile(t, "testdata/8khz.flac"))},
}

decoder, err := NewAudioDecoder(req)
if decoder != nil {
t.Error("expected nil decoder for invalid sample rate")
}
if err == nil {
t.Error("expected error for invalid sample rate")
}
}

// Helper to read test file contents
func readTestFile(t *testing.T, path string) []byte {
t.Helper()
data, err := os.ReadFile(path)
if err != nil {
t.Fatalf("failed to read test file %s: %v", path, err)
}
return data
}
Loading