Skip to content

Commit 7d22144

Browse files
authored
[skip changelog] Update workflow and script to fetch Arduino CDN download data (#1476)
1 parent be520ef commit 7d22144

File tree

3 files changed

+139
-126
lines changed

3 files changed

+139
-126
lines changed

.github/tools/fetch_athena_stats.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
import boto3
2+
import semver
3+
import os
4+
import logging
5+
import uuid
6+
import time
7+
8+
9+
# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
10+
log = logging.getLogger()
11+
logging.getLogger("boto3").setLevel(logging.CRITICAL)
12+
logging.getLogger("botocore").setLevel(logging.CRITICAL)
13+
logging.getLogger("urllib3").setLevel(logging.CRITICAL)
14+
15+
16+
def execute(client, statement, dest_s3_output_location):
17+
log.info("execute query: {} dumping in {}".format(statement, dest_s3_output_location))
18+
result = client.start_query_execution(
19+
QueryString=statement,
20+
ClientRequestToken=str(uuid.uuid4()),
21+
QueryExecutionContext={"Database": "etl_kpi_prod_hwfw"},
22+
ResultConfiguration={
23+
"OutputLocation": dest_s3_output_location,
24+
},
25+
)
26+
execution_id = result["QueryExecutionId"]
27+
log.info("wait for query {} completion".format(execution_id))
28+
wait_for_query_execution_completion(client, execution_id)
29+
log.info("operation successful")
30+
return execution_id
31+
32+
33+
def wait_for_query_execution_completion(client, query_execution_id):
34+
query_ended = False
35+
while not query_ended:
36+
query_execution = client.get_query_execution(QueryExecutionId=query_execution_id)
37+
state = query_execution["QueryExecution"]["Status"]["State"]
38+
if state == "SUCCEEDED":
39+
query_ended = True
40+
elif state in ["FAILED", "CANCELLED"]:
41+
raise BaseException(
42+
"query failed or canceled: {}".format(query_execution["QueryExecution"]["Status"]["StateChangeReason"])
43+
)
44+
else:
45+
time.sleep(1)
46+
47+
48+
def valid(key):
49+
split = key.split("_")
50+
if len(split) < 1:
51+
return False
52+
try:
53+
semver.parse(split[0])
54+
except ValueError:
55+
return False
56+
return True
57+
58+
59+
def get_results(client, execution_id):
60+
results_paginator = client.get_paginator("get_query_results")
61+
results_iter = results_paginator.paginate(QueryExecutionId=execution_id, PaginationConfig={"PageSize": 1000})
62+
res = {}
63+
for results_page in results_iter:
64+
for row in results_page["ResultSet"]["Rows"][1:]:
65+
# Loop through the JSON objects
66+
key = row["Data"][0]["VarCharValue"]
67+
if valid(key):
68+
res[key] = row["Data"][1]["VarCharValue"]
69+
70+
return res
71+
72+
73+
def convert_data(data):
74+
result = []
75+
for key, value in data.items():
76+
# 0.18.0_macOS_64bit.tar.gz
77+
split_key = key.split("_")
78+
if len(split_key) != 3:
79+
continue
80+
(version, os_version, arch) = split_key
81+
arch_split = arch.split(".")
82+
if len(arch_split) < 1:
83+
continue
84+
arch = arch_split[0]
85+
if len(arch) > 10:
86+
# This can't be an architecture really.
87+
# It's an ugly solution but works for now so deal with it.
88+
continue
89+
repo = os.environ["GITHUB_REPOSITORY"].split("/")[1]
90+
result.append(
91+
{
92+
"type": "gauge",
93+
"name": "arduino.downloads.total",
94+
"value": value,
95+
"host": os.environ["GITHUB_REPOSITORY"],
96+
"tags": [
97+
f"version:{version}",
98+
f"os:{os_version}",
99+
f"arch:{arch}",
100+
"cdn:downloads.arduino.cc",
101+
f"project:{repo}",
102+
],
103+
}
104+
)
105+
106+
return result
107+
108+
109+
if __name__ == "__main__":
110+
DEST_S3_OUTPUT = os.environ["AWS_ATHENA_OUTPUT_LOCATION"]
111+
AWS_ATHENA_SOURCE_TABLE = os.environ["AWS_ATHENA_SOURCE_TABLE"]
112+
113+
session = boto3.session.Session(region_name="us-east-1")
114+
athena_client = session.client("athena")
115+
116+
query = f"""SELECT replace(json_extract_scalar(url_decode(url_decode(querystring)),
117+
'$.data.url'), 'https://downloads.arduino.cc/arduino-cli/arduino-cli_', '')
118+
AS flavor, count(json_extract(url_decode(url_decode(querystring)),'$')) AS gauge
119+
FROM {AWS_ATHENA_SOURCE_TABLE}
120+
WHERE json_extract_scalar(url_decode(url_decode(querystring)),'$.data.url')
121+
LIKE 'https://downloads.arduino.cc/arduino-cli/arduino-cli_%'
122+
AND json_extract_scalar(url_decode(url_decode(querystring)),'$.data.url')
123+
NOT LIKE '%latest%' -- exclude latest redirect
124+
AND json_extract_scalar(url_decode(url_decode(querystring)),'$.data.url')
125+
NOT LIKE '%alpha%' -- exclude early alpha releases
126+
AND json_extract_scalar(url_decode(url_decode(querystring)),'$.data.url')
127+
NOT LIKE '%.tar.bz2%' -- exclude very old releases archive formats
128+
group by 1 ;"""
129+
exec_id = execute(athena_client, query, DEST_S3_OUTPUT)
130+
results = get_results(athena_client, exec_id)
131+
result_json = convert_data(results)
132+
133+
print(f"::set-output name=result::{result_json}")

.github/tools/fetch_athena_stats.sh

Lines changed: 0 additions & 121 deletions
This file was deleted.

.github/workflows/arduino-stats.yaml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ jobs:
1818
- name: Checkout
1919
uses: actions/checkout@v2
2020

21+
- uses: actions/setup-python@v2
22+
with:
23+
python-version: "3.x"
24+
2125
- name: Fetch downloads count form Arduino CDN using AWS Athena
2226
id: fetch
2327
env:
@@ -27,11 +31,8 @@ jobs:
2731
AWS_ATHENA_OUTPUT_LOCATION: ${{ secrets.STATS_AWS_ATHENA_OUTPUT_LOCATION }}
2832
GITHUB_REPOSITORY: ${{ github.repository }}
2933
run: |
30-
# Fetch jq 1.6 as VM has only 1.5 ATM
31-
wget -q https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64 -O jq
32-
chmod +x jq
33-
PATH="${{ github.workspace }}:$PATH"
34-
.github/tools/fetch_athena_stats.sh
34+
pip install boto3 semver
35+
python .github/tools/fetch_athena_stats.py
3536
3637
- name: Send metrics
3738
uses: masci/datadog@v1

0 commit comments

Comments
 (0)