Skip to content

Commit a936524

Browse files
committed
load script (can restore from dump.sh backup)
1 parent 08b8688 commit a936524

File tree

1 file changed

+118
-0
lines changed

1 file changed

+118
-0
lines changed

load.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
#!/usr/bin/env python3
2+
3+
import sys
4+
import re
5+
import time
6+
import gzip
7+
from urllib.parse import urljoin
8+
import requests
9+
import json
10+
11+
re.compile
12+
13+
isCommit = re.compile("^[0-9a-fA-F]{40}$")
14+
15+
HEADERS = {
16+
'User-Agent': 'cwlviewer-load/0.0.1',
17+
'Accept': 'application/json'
18+
}
19+
20+
21+
def parse_gitinfos(sourceFp):
22+
doc = json.load(sourceFp)
23+
for wf in doc["content"]:
24+
yield wf["retrievedFrom"]
25+
26+
def is_not_commit(gitinfo):
27+
return not isCommit.match(gitinfo["branch"])
28+
29+
def make_requests(gitinfos):
30+
for git in gitinfos:
31+
# dict_keys(['repoUrl', 'branch', 'path', 'packedId', 'url', 'rawUrl', 'type'])
32+
req = {
33+
"url": git["repoUrl"],
34+
"branch": git["branch"],
35+
"path": git["path"]
36+
}
37+
if git["packedId"]:
38+
req["packedId"] = git["packedId"]
39+
yield req
40+
41+
def send(base, req):
42+
url = urljoin(base, "/workflows")
43+
r = requests.post(url, data=req, allow_redirects=False, headers=HEADERS)
44+
print("Posted: %s" % req)
45+
if r.status_code == 202:
46+
location = urljoin(url, r.headers["Location"])
47+
print(" queued: %s" % location)
48+
# need to check later
49+
return location
50+
if r.status_code == 303:
51+
print (" done: %s" % r.headers["Location"])
52+
return None # Already there, all OK
53+
print("Unhandled HTTP status code: %s %s" %
54+
(r.status_code, r.text))
55+
56+
def send_requests(base, requests):
57+
for req in requests:
58+
yield send(base, req)
59+
60+
def is_running(location):
61+
if not location:
62+
return True
63+
queued = requests.get(location, allow_redirects=False, headers=HEADERS)
64+
if queued.status_code == 303:
65+
# Done!
66+
return False
67+
j = queued.json()
68+
if j["cwltoolStatus"] == "RUNNING":
69+
return True
70+
elif j["cwltoolStatus"] == "ERROR":
71+
print("Failed %s: %s" % (location, j["message"]))
72+
return False
73+
else:
74+
raise Exception("Unhandled queue status: %s %s" % (queued.status_code, queued.text))
75+
76+
MAX_CONCURRENT=6 # Maximum number in queue
77+
SLEEP=0.5 # wait SLEEP seconds if queue is full
78+
79+
def trim_queue(queue):
80+
new_queue = []
81+
for q in queue:
82+
if is_running(q):
83+
#print("Still running %s" % q)
84+
new_queue.append(q)
85+
print("Trimmed queue from %s to %s" % (len(queue), len(new_queue)))
86+
return new_queue
87+
88+
def main(jsonfile="-", base="http://view.commonwl.org:8082/", *args):
89+
if jsonfile == "-":
90+
source = sys.stdin
91+
elif jsonfile.endswith(".gz"):
92+
source = gzip.open(jsonfile, "rb")
93+
else:
94+
source = open(jsonfile, "rb")
95+
96+
gitinfos = parse_gitinfos(source)
97+
if "--no-commits" in args:
98+
gitinfos = filter(is_not_commit, gitinfos)
99+
100+
requests = make_requests(gitinfos)
101+
queued = []
102+
for q in send_requests(base, requests):
103+
if q:
104+
queued.append(q)
105+
while len(queued) >= MAX_CONCURRENT:
106+
time.sleep(SLEEP)
107+
queued = trim_queue(queued)
108+
# Finish the rest of the queue
109+
while queued:
110+
queued = trim_queue(queued)
111+
112+
if __name__ == "__main__":
113+
if "-h" in sys.argv:
114+
print("load.py [jsonfile] [baseurl] [--no-commits]")
115+
sys.exit(1)
116+
117+
main(*sys.argv[1:])
118+

0 commit comments

Comments
 (0)