20
20
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
21
21
"client_x509_cert_url": "{CERT_URL}"
22
22
23
+ In Airflow 1.9.0 this requires to use the web interface or cli to set connection extra's. If you prefer to not use the
24
+ web interface to manage connections you can also supply the key as a json file.
25
+
26
+ @TODO: add support for p12 keys
27
+
23
28
More details can be found here:
24
29
https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
25
30
"""
26
31
27
- from airflow .hooks .base_hook import BaseHook
32
+ import time
33
+ import os
28
34
35
+ from airflow .hooks .base_hook import BaseHook
36
+ from airflow import configuration as conf
29
37
from apiclient .discovery import build
38
+ from apiclient .http import MediaInMemoryUpload
30
39
from oauth2client .service_account import ServiceAccountCredentials
31
40
from oauth2client .client import AccessTokenCredentials
32
- import time
41
+ from collections import namedtuple
33
42
34
43
35
44
class GoogleAnalyticsHook (BaseHook ):
36
- def __init__ (self , google_analytics_conn_id = 'google_analytics_default' ):
45
+ GAService = namedtuple ('GAService' , ['name' , 'version' , 'scopes' ])
46
+ # We need to rely on 2 services depending on the task at hand: reading from or writing to GA.
47
+ _services = {
48
+ 'reporting' : GAService (name = 'analyticsreporting' ,
49
+ version = 'v4' ,
50
+ scopes = ['https://www.googleapis.com/auth/analytics.readonly' ]),
51
+ 'management' : GAService (name = 'analytics' ,
52
+ version = 'v3' ,
53
+ scopes = ['https://www.googleapis.com/auth/analytics' ])
54
+ }
55
+ _key_folder = os .path .join (conf .get ('core' , 'airflow_home' ), 'keys' )
56
+
57
+ def __init__ (self , google_analytics_conn_id = 'google_analytics_default' , key_file = None ):
37
58
self .google_analytics_conn_id = google_analytics_conn_id
38
59
self .connection = self .get_connection (google_analytics_conn_id )
60
+ if 'client_secrets' in self .connection .extra_dejson :
61
+ self .client_secrets = self .connection .extra_dejson ['client_secrets' ]
62
+ if key_file :
63
+ self .file_location = os .path .join (GoogleAnalyticsHook ._key_folder , key_file )
39
64
40
- self .client_secrets = self .connection .extra_dejson ['client_secrets' ]
65
+ def get_service_object (self , name ):
66
+ service = GoogleAnalyticsHook ._services [name ]
41
67
42
- def get_service_object (self ,
43
- api_name ,
44
- api_version ,
45
- scopes ):
46
68
if self .connection .password :
47
69
credentials = AccessTokenCredentials (self .connection .password ,
48
- 'Astronomer /1.0' )
49
- elif self . client_secrets :
70
+ 'Airflow /1.0' )
71
+ elif hasattr ( self , ' client_secrets' ) :
50
72
credentials = ServiceAccountCredentials .from_json_keyfile_dict (self .client_secrets ,
51
- scopes )
73
+ service .scopes )
74
+
75
+ elif hasattr (self , 'file_location' ):
76
+ credentials = ServiceAccountCredentials .from_json_keyfile_name (self .file_location ,
77
+ service .scopes )
78
+ else :
79
+ raise ValueError ('No valid credentials could be found' )
52
80
53
- return build (api_name , api_version , credentials = credentials )
81
+ return build (service .name , service .version , credentials = credentials )
82
+
83
+ def get_management_report (self ,
84
+ view_id ,
85
+ since ,
86
+ until ,
87
+ metrics ,
88
+ dimensions ):
89
+
90
+ analytics = self .get_service_object (name = 'management' )
91
+
92
+ return analytics .data ().ga ().get (
93
+ ids = view_id ,
94
+ start_date = since ,
95
+ end_date = until ,
96
+ metrics = metrics ,
97
+ dimensions = dimensions ).execute ()
54
98
55
99
def get_analytics_report (self ,
56
100
view_id ,
@@ -61,9 +105,8 @@ def get_analytics_report(self,
61
105
metrics ,
62
106
page_size ,
63
107
include_empty_rows ):
64
- analytics = self .get_service_object ('analyticsreporting' ,
65
- 'v4' ,
66
- ['https://www.googleapis.com/auth/analytics.readonly' ])
108
+
109
+ analytics = self .get_service_object (name = 'reporting' )
67
110
68
111
reportRequest = {
69
112
'viewId' : view_id ,
@@ -88,9 +131,9 @@ def get_analytics_report(self,
88
131
time .sleep (1 )
89
132
reportRequest .update ({'pageToken' : report ['nextPageToken' ]})
90
133
response = (analytics
91
- .reports ()
92
- .batchGet (body = {'reportRequests' : [reportRequest ]})
93
- .execute ())
134
+ .reports ()
135
+ .batchGet (body = {'reportRequests' : [reportRequest ]})
136
+ .execute ())
94
137
report = response ['reports' ][0 ]
95
138
rows .extend (report .get ('data' , {}).get ('rows' , []))
96
139
@@ -100,3 +143,16 @@ def get_analytics_report(self,
100
143
return report
101
144
else :
102
145
return {}
146
+
147
+ def upload_string (self , account_id , profile_id , string , data_source_id ):
148
+ """
149
+ Upload to custom data sources - example function
150
+ https://developers.google.com/analytics/devguides/config/mgmt/v3/mgmtReference/management/uploads/uploadData
151
+ """
152
+ analytics = self .get_service_object (name = 'management' )
153
+ media = MediaInMemoryUpload (string , mimetype = 'application/octet-stream' , resumable = False )
154
+ analytics .management ().uploads ().uploadData (
155
+ accountId = account_id ,
156
+ webPropertyId = profile_id ,
157
+ customDataSourceId = data_source_id ,
158
+ media_body = media ).execute ()
0 commit comments