This repository was archived by the owner on Feb 17, 2023. It is now read-only.
File tree Expand file tree Collapse file tree 2 files changed +24
-3
lines changed Expand file tree Collapse file tree 2 files changed +24
-3
lines changed Original file line number Diff line number Diff line change @@ -62,6 +62,26 @@ var uastrings = []struct {
62
62
ua : "Facebot" ,
63
63
expected : "Browser:Facebot Bot:true Mobile:false" ,
64
64
},
65
+ {
66
+ title : "NutchCVS" ,
67
+ ua :
"NutchCVS/0.8-dev (Nutch; http://lucene.apache.org/nutch/bot.html; [email protected] )" ,
68
+ expected : "Browser:NutchCVS Bot:true Mobile:false" ,
69
+ },
70
+ {
71
+ title : "MJ12bot" ,
72
+ ua : "Mozilla/5.0 (compatible; MJ12bot/v1.2.4; http://www.majestic12.co.uk/bot.php?+)" ,
73
+ expected : "Mozilla:5.0 Browser:MJ12bot-v1.2.4 Bot:true Mobile:false" ,
74
+ },
75
+ {
76
+ title : "MJ12bot" ,
77
+ ua : "MJ12bot/v1.0.8 (http://majestic12.co.uk/bot.php?+)" ,
78
+ expected : "Browser:MJ12bot Bot:true Mobile:false" ,
79
+ },
80
+ {
81
+ title : "AhrefsBot" ,
82
+ ua : "Mozilla/5.0 (compatible; AhrefsBot/4.0; +http://ahrefs.com/robot/)" ,
83
+ expected : "Mozilla:5.0 Browser:AhrefsBot-4.0 Bot:true Mobile:false" ,
84
+ },
65
85
66
86
// Internet Explorer
67
87
{
Original file line number Diff line number Diff line change @@ -75,6 +75,8 @@ func (p *UserAgent) fixOther(sections []section) {
75
75
}
76
76
}
77
77
78
+ var botRegex = regexp .MustCompile ("(?i)(bot|crawler|sp(i|y)der|search|worm|fetch|nutch)" )
79
+
78
80
// Check if we're dealing with a bot or with some weird browser. If that is the
79
81
// case, the receiver will be modified accordingly.
80
82
func (p * UserAgent ) checkBot (sections []section ) {
@@ -83,9 +85,8 @@ func (p *UserAgent) checkBot(sections []section) {
83
85
if len (sections ) == 1 && sections [0 ].name != "Mozilla" {
84
86
p .mozilla = ""
85
87
86
- // Check whether the name has some suspicious "bot" in his name.
87
- reg , _ := regexp .Compile ("(?i)bot" )
88
- if reg .Match ([]byte (sections [0 ].name )) {
88
+ // Check whether the name has some suspicious "bot" or "crawler" in his name.
89
+ if botRegex .Match ([]byte (sections [0 ].name )) {
89
90
p .setSimple (sections [0 ].name , "" , true )
90
91
return
91
92
}
You can’t perform that action at this time.
0 commit comments