1
1
import { publish } from '@/events/lib/hydro'
2
2
import { hydroNames } from '@/events/lib/schema'
3
+ import { createLogger } from '@/observability/logger'
4
+
5
+ const logger = createLogger ( import . meta. url )
3
6
4
7
/**
5
8
* Handles search analytics and client_name validation for external requests
@@ -40,20 +43,29 @@ export async function handleExternalSearchAnalytics(
40
43
else if ( normalizedHost . endsWith ( '.github.net' ) || normalizedHost . endsWith ( '.githubapp.com' ) ) {
41
44
return null
42
45
}
43
- // For localhost development without client_name, we'll still send analytics below
44
46
}
45
47
46
48
// For localhost, ensure we have a client_name for analytics
47
49
if ( normalizedHost === 'localhost' && ! client_name ) {
48
50
client_name = 'localhost'
49
51
}
50
52
53
+ // Log when we detect an external request that we will send analytics for
54
+ if ( client_name && client_name !== 'docs.github.com-client' ) {
55
+ logger . info ( 'External search analytics: Sending analytics for external client' , {
56
+ client_name,
57
+ searchContext,
58
+ isLikelyExternalAPI,
59
+ normalizedHost,
60
+ userAgent : sanitizeUserAgent ( req . headers [ 'user-agent' ] ) ,
61
+ } )
62
+ }
63
+
51
64
// Send search event with client identifier
52
65
try {
53
- await publish ( {
66
+ const analyticsPayload = {
54
67
schema : hydroNames . search ,
55
68
value : {
56
- type : 'search' ,
57
69
version : '1.0.0' ,
58
70
context : {
59
71
event_id : crypto . randomUUID ( ) ,
@@ -73,7 +85,9 @@ export async function handleExternalSearchAnalytics(
73
85
search_context : searchContext ,
74
86
search_client : client_name as string ,
75
87
} ,
76
- } )
88
+ }
89
+
90
+ await publish ( analyticsPayload )
77
91
} catch ( error ) {
78
92
// Don't fail the request if analytics fails
79
93
console . error ( 'Failed to send search analytics:' , error )
@@ -82,6 +96,34 @@ export async function handleExternalSearchAnalytics(
82
96
return null
83
97
}
84
98
99
+ /**
100
+ * Sanitizes user agent by extracting only the main client type
101
+ * Returns a safe string with just the primary client identifier
102
+ */
103
+ function sanitizeUserAgent ( userAgent : string | undefined ) : string {
104
+ if ( ! userAgent ) return 'unknown'
105
+
106
+ // Extract common client types while removing version numbers and detailed info
107
+ const patterns = [
108
+ { regex : / ^ c u r l / i, name : 'curl' } ,
109
+ { regex : / ^ w g e t / i, name : 'wget' } ,
110
+ { regex : / p y t h o n - r e q u e s t s / i, name : 'python-requests' } ,
111
+ { regex : / a x i o s / i, name : 'axios' } ,
112
+ { regex : / n o d e - f e t c h / i, name : 'node-fetch' } ,
113
+ { regex : / G o - h t t p - c l i e n t / i, name : 'go-http-client' } ,
114
+ { regex : / o k h t t p / i, name : 'okhttp' } ,
115
+ { regex : / M o z i l l a / i, name : 'browser' } ,
116
+ ]
117
+
118
+ for ( const pattern of patterns ) {
119
+ if ( pattern . regex . test ( userAgent ) ) {
120
+ return pattern . name
121
+ }
122
+ }
123
+
124
+ return 'other'
125
+ }
126
+
85
127
/**
86
128
* Determines if a host should bypass client_name requirement for analytics
87
129
* Returns true if the host ends with github.net or githubapp.com
0 commit comments