Skip to content

Commit 3acb34a

Browse files
authored
Merge pull request #4 from hatamiarash7/input-array
Support Input Array
2 parents fa40c6d + 1be96c1 commit 3acb34a

21 files changed

+287
-64
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ test/python/__pycache__/
88
.Rhistory
99
*.log
1010
*.csv
11-
!test/data/tranco.csv
11+
!test/data/*.csv

README.md

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -239,20 +239,20 @@ You can use this function to get the ranking of a domain:
239239

240240
```sql
241241
D SELECT get_tranco_rank('microsoft.com') as rank;
242-
┌───────┐
243-
│ rank │
244-
int32
245-
├───────┤
246-
2
247-
└───────┘
242+
┌─────────
243+
rank
244+
varchar
245+
├─────────
246+
2
247+
└─────────
248248

249249
D SELECT get_tranco_rank('cloudflare.com') as rank;
250-
┌───────┐
251-
│ rank │
252-
int32
253-
├───────┤
254-
13
255-
└───────┘
250+
┌─────────
251+
rank
252+
varchar
253+
├─────────
254+
13
255+
└─────────
256256
```
257257

258258
### Get Extension Version

src/functions/extract_domain.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,23 @@ namespace duckdb
1111
{
1212
// Extract the input from the arguments
1313
auto &input_vector = args.data[0];
14-
auto input = input_vector.GetValue (0).ToString ();
14+
auto result_data = FlatVector::GetData<string_t> (result);
1515

16-
if (input.empty ())
16+
for (idx_t i = 0; i < args.size (); i++)
1717
{
18-
result.SetValue (0, Value (""));
19-
return;
20-
}
21-
22-
// Extract the domain using the utility function
23-
auto domain = netquack::ExtractDomain (state, input);
18+
auto input = input_vector.GetValue (i).ToString ();
2419

25-
result.SetValue (0, Value (domain));
20+
try
21+
{
22+
// Extract the domain using the utility function
23+
auto domain = netquack::ExtractDomain (state, input);
24+
result_data[i] = StringVector::AddString (result, domain);
25+
}
26+
catch (const std::exception &e)
27+
{
28+
result_data[i] = "Error extracting domain: " + std::string (e.what ());
29+
}
30+
}
2631
}
2732

2833
namespace netquack

src/functions/extract_host.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,23 @@ namespace duckdb
99
{
1010
// Extract the input from the arguments
1111
auto &input_vector = args.data[0];
12-
auto input = input_vector.GetValue (0).ToString ();
12+
auto result_data = FlatVector::GetData<string_t> (result);
1313

14-
// Extract the host using the utility function
15-
auto host = netquack::ExtractHost (input);
14+
for (idx_t i = 0; i < args.size (); i++)
15+
{
16+
auto input = input_vector.GetValue (i).ToString ();
1617

17-
// Set the result
18-
result.SetValue (0, Value (host));
18+
try
19+
{
20+
// Extract the host using the utility function
21+
auto host = netquack::ExtractHost (input);
22+
result_data[i] = StringVector::AddString (result, host);
23+
}
24+
catch (const std::exception &e)
25+
{
26+
result_data[i] = "Error extracting host: " + std::string (e.what ());
27+
}
28+
}
1929
}
2030

2131
namespace netquack

src/functions/extract_path.cpp

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,23 @@ namespace duckdb
99
{
1010
// Extract the input from the arguments
1111
auto &input_vector = args.data[0];
12-
auto input = input_vector.GetValue (0).ToString ();
12+
auto result_data = FlatVector::GetData<string_t> (result);
1313

14-
// Extract the path using the utility function
15-
auto path = netquack::ExtractPath (input);
14+
for (idx_t i = 0; i < args.size (); i++)
15+
{
16+
auto input = input_vector.GetValue (i).ToString ();
1617

17-
// Set the result
18-
result.SetValue (0, Value (path));
18+
try
19+
{
20+
// Extract the path using the utility function
21+
auto path = netquack::ExtractPath (input);
22+
result_data[i] = StringVector::AddString (result, path);
23+
}
24+
catch (const std::exception &e)
25+
{
26+
result_data[i] = "Error extracting path: " + std::string (e.what ());
27+
}
28+
};
1929
}
2030

2131
namespace netquack
@@ -26,11 +36,12 @@ namespace duckdb
2636
// Explanation:
2737
// ^ - Start of the string
2838
// (?: - Non-capturing group for the protocol and domain part
29-
// (?:(?:ftp|https?|rsync):\/\/)? - Optional ftp://, http://, https://, or rsync://
30-
// (?:[^\/\s]+) - Domain name (any characters except '/' or whitespace)
39+
// (?:(?:ftp|https?|rsync):\/\/)? - Optional protocol (ftp://, http://, https://, or rsync://)
40+
// (?:[^\/\s]+) - Domain name or IP address (any characters except '/' or whitespace)
3141
// )
32-
// (\/[^?#]*) - Capturing group for the path (starts with '/', followed by any characters except '?' or '#')
33-
std::regex path_regex (R"(^(?:(?:(?:ftp|https?|rsync):\/\/)?(?:[^\/\s]+))(\/[^?#]*))");
42+
// (\/[^?#]*)? - Optional capturing group for the path (starts with '/', followed by any characters except '?' or '#')
43+
// - The '?' at the end makes the path component optional, allowing the regex to match URLs with or without a path
44+
std::regex path_regex (R"(^(?:(?:(?:ftp|https?|rsync):\/\/)?(?:[^\/\s]+))(\/[^?#]*)?)");
3445
std::smatch path_match;
3546

3647
// Use regex_search to find the path component in the input string

src/functions/extract_query.cpp

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,25 @@ namespace duckdb
77
// Function to extract the query string from a URL
88
void ExtractQueryStringFunction (DataChunk &args, ExpressionState &state, Vector &result)
99
{
10-
// Extract the URL from the input
11-
auto &url_vector = args.data[0];
12-
auto url = url_vector.GetValue (0).ToString ();
10+
// Extract the input from the arguments
11+
auto &input_vector = args.data[0];
12+
auto result_data = FlatVector::GetData<string_t> (result);
1313

14-
// Extract the query string
15-
auto query_string = netquack::ExtractQueryString (url);
14+
for (idx_t i = 0; i < args.size (); i++)
15+
{
16+
auto input = input_vector.GetValue (i).ToString ();
1617

17-
// Set the result
18-
result.SetValue (0, Value (query_string));
18+
try
19+
{
20+
// Extract the query string using the utility function
21+
auto query_string = netquack::ExtractQueryString (input);
22+
result_data[i] = StringVector::AddString (result, query_string);
23+
}
24+
catch (const std::exception &e)
25+
{
26+
result_data[i] = "Error extracting query string: " + std::string (e.what ());
27+
}
28+
};
1929
}
2030

2131
namespace netquack

src/functions/extract_schema.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,23 @@ namespace duckdb
99
{
1010
// Extract the input from the arguments
1111
auto &input_vector = args.data[0];
12-
auto input = input_vector.GetValue (0).ToString ();
12+
auto result_data = FlatVector::GetData<string_t> (result);
1313

14-
// Extract the schema using the utility function
15-
auto schema = netquack::ExtractSchema (input);
14+
for (idx_t i = 0; i < args.size (); i++)
15+
{
16+
auto input = input_vector.GetValue (i).ToString ();
1617

17-
// Set the result
18-
result.SetValue (0, Value (schema));
18+
try
19+
{
20+
// Extract the schema using the utility function
21+
auto schema = netquack::ExtractSchema (input);
22+
result_data[i] = StringVector::AddString (result, schema);
23+
}
24+
catch (const std::exception &e)
25+
{
26+
result_data[i] = "Error extracting schema: " + std::string (e.what ());
27+
}
28+
};
1929
}
2030

2131
namespace netquack

src/functions/extract_subdomain.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,23 @@ namespace duckdb
1111
{
1212
// Extract the input from the arguments
1313
auto &input_vector = args.data[0];
14-
auto input = input_vector.GetValue (0).ToString ();
14+
auto result_data = FlatVector::GetData<string_t> (result);
1515

16-
// Extract the sub-domain using the utility function
17-
auto subdomain = netquack::ExtractSubDomain (state, input);
16+
for (idx_t i = 0; i < args.size (); i++)
17+
{
18+
auto input = input_vector.GetValue (i).ToString ();
1819

19-
result.SetValue (0, Value (subdomain));
20+
try
21+
{
22+
// Extract the subdomain using the utility function
23+
auto subdomain = netquack::ExtractSubDomain (state, input);
24+
result_data[i] = StringVector::AddString (result, subdomain);
25+
}
26+
catch (const std::exception &e)
27+
{
28+
result_data[i] = "Error extracting subdomain: " + std::string (e.what ());
29+
}
30+
}
2031
}
2132

2233
namespace netquack

src/functions/extract_tld.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,23 @@ namespace duckdb
1111
{
1212
// Extract the input from the arguments
1313
auto &input_vector = args.data[0];
14-
auto input = input_vector.GetValue (0).ToString ();
14+
auto result_data = FlatVector::GetData<string_t> (result);
1515

16-
// Extract the top-level domain using the utility function
17-
auto tld = netquack::ExtractTLD (state, input);
16+
for (idx_t i = 0; i < args.size (); i++)
17+
{
18+
auto input = input_vector.GetValue (i).ToString ();
1819

19-
// Set the result
20-
result.SetValue (0, Value (tld));
20+
try
21+
{
22+
// Extract the top-level domain using the utility function
23+
auto tld = netquack::ExtractTLD (state, input);
24+
result_data[i] = StringVector::AddString (result, tld);
25+
}
26+
catch (const std::exception &e)
27+
{
28+
result_data[i] = "Error extracting tld: " + std::string (e.what ());
29+
}
30+
}
2131
}
2232

2333
namespace netquack

src/functions/get_tranco.cpp

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,13 +148,28 @@ namespace duckdb
148148
throw std::runtime_error ("Tranco table not found. Download it first using `SELECT update_tranco(true);`");
149149
}
150150

151-
auto &domain_vector = args.data[0];
152-
auto domain = domain_vector.GetValue (0).ToString ();
151+
// Extract the input from the arguments
152+
auto &input_vector = args.data[0];
153+
auto result_data = FlatVector::GetData<string_t> (result);
153154

154-
auto query = "SELECT rank FROM tranco_list WHERE domain = '" + domain + "'";
155-
auto query_result = con.Query (query);
155+
for (idx_t i = 0; i < args.size (); i++)
156+
{
157+
auto input = input_vector.GetValue (i).ToString ();
158+
159+
try
160+
{
161+
auto query = "SELECT rank FROM tranco_list WHERE domain = '" + input + "'";
162+
163+
auto query_result = con.Query (query);
164+
auto rank = query_result->RowCount () > 0 ? query_result->GetValue (0, 0) : Value ();
156165

157-
result.SetValue (0, query_result->RowCount () > 0 ? query_result->GetValue (0, 0) : Value ());
166+
result_data[i] = StringVector::AddString (result, rank.ToString ());
167+
}
168+
catch (const std::exception &e)
169+
{
170+
result_data[i] = "Error extracting tranco rank: " + std::string (e.what ());
171+
}
172+
}
158173
}
159174
} // namespace netquack
160175
} // namespace duckdb

0 commit comments

Comments
 (0)