Skip to content

Commit 9e3bcbf

Browse files
authored
Merge pull request github#39843 from github/repo-sync
Repo sync
2 parents 15356b0 + 47360b4 commit 9e3bcbf

File tree

1 file changed

+46
-4
lines changed

1 file changed

+46
-4
lines changed

src/search/lib/helpers/external-search-analytics.ts

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import { publish } from '@/events/lib/hydro'
22
import { hydroNames } from '@/events/lib/schema'
3+
import { createLogger } from '@/observability/logger'
4+
5+
const logger = createLogger(import.meta.url)
36

47
/**
58
* Handles search analytics and client_name validation for external requests
@@ -40,20 +43,29 @@ export async function handleExternalSearchAnalytics(
4043
else if (normalizedHost.endsWith('.github.net') || normalizedHost.endsWith('.githubapp.com')) {
4144
return null
4245
}
43-
// For localhost development without client_name, we'll still send analytics below
4446
}
4547

4648
// For localhost, ensure we have a client_name for analytics
4749
if (normalizedHost === 'localhost' && !client_name) {
4850
client_name = 'localhost'
4951
}
5052

53+
// Log when we detect an external request that we will send analytics for
54+
if (client_name && client_name !== 'docs.github.com-client') {
55+
logger.info('External search analytics: Sending analytics for external client', {
56+
client_name,
57+
searchContext,
58+
isLikelyExternalAPI,
59+
normalizedHost,
60+
userAgent: sanitizeUserAgent(req.headers['user-agent']),
61+
})
62+
}
63+
5164
// Send search event with client identifier
5265
try {
53-
await publish({
66+
const analyticsPayload = {
5467
schema: hydroNames.search,
5568
value: {
56-
type: 'search',
5769
version: '1.0.0',
5870
context: {
5971
event_id: crypto.randomUUID(),
@@ -73,7 +85,9 @@ export async function handleExternalSearchAnalytics(
7385
search_context: searchContext,
7486
search_client: client_name as string,
7587
},
76-
})
88+
}
89+
90+
await publish(analyticsPayload)
7791
} catch (error) {
7892
// Don't fail the request if analytics fails
7993
console.error('Failed to send search analytics:', error)
@@ -82,6 +96,34 @@ export async function handleExternalSearchAnalytics(
8296
return null
8397
}
8498

99+
/**
100+
* Sanitizes user agent by extracting only the main client type
101+
* Returns a safe string with just the primary client identifier
102+
*/
103+
function sanitizeUserAgent(userAgent: string | undefined): string {
104+
if (!userAgent) return 'unknown'
105+
106+
// Extract common client types while removing version numbers and detailed info
107+
const patterns = [
108+
{ regex: /^curl/i, name: 'curl' },
109+
{ regex: /^wget/i, name: 'wget' },
110+
{ regex: /python-requests/i, name: 'python-requests' },
111+
{ regex: /axios/i, name: 'axios' },
112+
{ regex: /node-fetch/i, name: 'node-fetch' },
113+
{ regex: /Go-http-client/i, name: 'go-http-client' },
114+
{ regex: /okhttp/i, name: 'okhttp' },
115+
{ regex: /Mozilla/i, name: 'browser' },
116+
]
117+
118+
for (const pattern of patterns) {
119+
if (pattern.regex.test(userAgent)) {
120+
return pattern.name
121+
}
122+
}
123+
124+
return 'other'
125+
}
126+
85127
/**
86128
* Determines if a host should bypass client_name requirement for analytics
87129
* Returns true if the host ends with github.net or githubapp.com

0 commit comments

Comments
 (0)