|
| 1 | +#!/usr/bin/env bash |
1 | 2 | # Find the latest version of the dataset
|
2 |
| -ZENODO_ENDPOINT="https://zenodo.org" |
3 |
| -DEPOSITION_PREFIX="${ZENODO_ENDPOINT}/api/deposit/depositions" |
4 | 3 | ORIGINAL_ID="13892061"
|
5 | 4 | FILE_TO_VERSION="manifests/profile_index.csv"
|
6 |
| -FILENAME=$(echo ${FILE_TO_VERSION} | sed 's+.*/++g') |
| 5 | +METADATA_JSON='{ |
| 6 | + "metadata": { |
| 7 | + "title": "The Joint Undertaking for Morphological Profiling (JUMP) Consortium Datasets Index", |
| 8 | + "creators": [ |
| 9 | + { |
| 10 | + "name": "The JUMP Cell Painting Consortium" |
| 11 | + } |
| 12 | + ], |
| 13 | + "upload_type": "dataset", |
| 14 | + "access_right": "open" |
| 15 | + } |
| 16 | +}' |
| 17 | + |
| 18 | +ZENODO_ENDPOINT="https://zenodo.org" |
| 19 | +DEPOSITION_PREFIX="${ZENODO_ENDPOINT}/api/deposit/depositions" |
| 20 | + |
| 21 | +FILENAME=${FILE_TO_VERSION##*/} |
7 | 22 |
|
8 | 23 | echo "Checking that S3 ETags match their local counterpart"
|
9 |
| -S3_ETAGS=$(cat ${FILE_TO_VERSION} | tail -n +2 | cut -f2 -d',' | xargs -I {} -- curl -I --silent "{}" | grep ETag | awk '{print $2}' | sed 's/\r$//' | md5sum | cut -f1 -d" ") |
10 |
| -LOCAL_ETAGS=$(cat ${FILE_TO_VERSION} | tail -n +2 | cut -f3 -d',' | md5sum | cut -f1 -d" ") |
11 | 24 |
|
12 |
| -echo "Remote ${S3_ETAGS} vs Local ${LOCAL_ETAGS} values" |
13 |
| -if [ "${S3_ETAGS}" != "${LOCAL_ETAGS}" ]; then |
14 |
| - echo "At least one ETag does not match their url." |
15 |
| - exit 1 |
16 |
| -fi |
| 25 | +# Extract URLs and ETags |
| 26 | +url_column=$(head -n1 "${FILE_TO_VERSION}" | tr ',' '\n' | grep -n "url" | cut -d':' -f1) |
| 27 | +urls=$(awk -F',' -v col="${url_column}" 'NR>1 {gsub(/^"|"$/, "", $col); print $col}' "${FILE_TO_VERSION}") |
17 | 28 |
|
18 |
| -if [ -z "${ORIGINAL_ID}" ]; then # Only get latest id when provided an original one |
19 |
| - echo "Creating new deposition" |
20 |
| - DEPOSITION_ENDPOINT="${DEPOSITION_PREFIX}" |
21 |
| -else # Update existing dataset |
22 |
| - echo "Previous ID Exists" |
23 |
| - LATEST_ID=$(curl "${ZENODO_ENDPOINT}/records/${ORIGINAL_ID}/latest" | |
24 |
| - grep records | sed 's/.*href=".*\.org\/records\/\(.*\)".*/\1/') |
25 |
| - REMOTE_HASH=$(curl -H "Content-Type: application/json" -X GET --data "{}" \ |
26 |
| - "${DEPOSITION_PREFIX}/${LATEST_ID}/files?access_token=${ZENODO_TOKEN}" | |
27 |
| - jq ".[] .links .download" | xargs curl | md5sum | cut -f1 -d" ") |
28 |
| - LOCAL_HASH=$(md5sum ${FILE_TO_VERSION} | cut -f1 -d" ") |
29 |
| - |
30 |
| - echo "Checking for changes in file contents: Remote ${REMOTE_HASH} vs Local ${LOCAL_HASH}" |
31 |
| - if [ "${REMOTE_HASH}" == "${LOCAL_HASH}" ]; then |
32 |
| - echo "The urls and md5sums have not changed" |
33 |
| - exit 0 |
34 |
| - fi |
35 |
| - |
36 |
| - echo "Creating new version" |
37 |
| - DEPOSITION_ENDPOINT="${DEPOSITION_PREFIX}/${LATEST_ID}/actions/newversion" |
38 |
| -fi |
| 29 | +etag_column=$(head -n1 "${FILE_TO_VERSION}" | tr ',' '\n' | grep -n "etag" | cut -d':' -f1) |
| 30 | +local_etags=$(awk -F',' -v col="${etag_column}" 'NR>1 {gsub(/^"|"$/, "", $col); print $col}' "${FILE_TO_VERSION}") |
39 | 31 |
|
| 32 | +s3_etags="" |
| 33 | +while IFS= read -r url; do |
| 34 | + etag=$(curl -I --silent "$url" | awk '/[eE][tT]ag:/ {print $2}' | tr -d '\r"') |
| 35 | + s3_etags+="${etag}\n" |
| 36 | +done <<<"${urls}" |
| 37 | + |
| 38 | +# Remove the trailing newline from s3_etags |
| 39 | +s3_etags=$(echo -e "${s3_etags}" | sed '/^$/d') |
| 40 | + |
| 41 | +# Calculate checksums for comparison |
| 42 | +s3_etags_hash=$(echo -e "${s3_etags}" | md5sum | cut -f1 -d" ") |
| 43 | +local_etags_hash=$(echo "${local_etags}" | md5sum | cut -f1 -d" ") |
| 44 | + |
| 45 | +echo "Remote ${s3_etags_hash} vs Local ${local_etags_hash} values" |
| 46 | +if [ "${s3_etags_hash}" != "${local_etags_hash}" ]; then |
| 47 | + echo "At least one ETag does not match their url." |
| 48 | + exit 1 |
| 49 | +fi |
40 | 50 |
|
41 | 51 | if [ -z "${ZENODO_TOKEN}" ]; then # Check Zenodo Token
|
42 |
| - echo "Access token not available" |
43 |
| - exit 1 |
| 52 | + echo "Access token not available" |
| 53 | + exit 1 |
44 | 54 | else
|
45 |
| - echo "Access token found." |
| 55 | + echo "Access token found." |
46 | 56 | fi
|
47 | 57 |
|
| 58 | +if [ -z "${ORIGINAL_ID}" ]; then # Only get latest id when provided an original one |
| 59 | + echo "Creating new deposition" |
| 60 | + DEPOSITION_ENDPOINT="${DEPOSITION_PREFIX}" |
| 61 | +else # Update existing dataset |
| 62 | + echo "Previous ID Exists" |
| 63 | + LATEST_ID=$(curl "${ZENODO_ENDPOINT}/records/${ORIGINAL_ID}/latest" | |
| 64 | + grep records | sed 's/.*href=".*\.org\/records\/\(.*\)".*/\1/') |
| 65 | + REMOTE_HASH=$(curl -H "Content-Type: application/json" -X GET --data "{}" \ |
| 66 | + "${DEPOSITION_PREFIX}/${LATEST_ID}/files?access_token=${ZENODO_TOKEN}" | |
| 67 | + jq ".[] .links .download" | xargs curl | md5sum | cut -f1 -d" ") |
| 68 | + LOCAL_HASH=$(md5sum ${FILE_TO_VERSION} | cut -f1 -d" ") |
| 69 | + |
| 70 | + echo "Checking for changes in file contents: Remote ${REMOTE_HASH} vs Local ${LOCAL_HASH}" |
| 71 | + if [ "${REMOTE_HASH}" == "${LOCAL_HASH}" ]; then |
| 72 | + echo "The urls and md5sums have not changed" |
| 73 | + exit 0 |
| 74 | + fi |
| 75 | + |
| 76 | + echo "Creating new version" |
| 77 | + DEPOSITION_ENDPOINT="${DEPOSITION_PREFIX}/${LATEST_ID}/actions/newversion" |
| 78 | +fi |
48 | 79 |
|
49 | 80 | # Create new deposition
|
50 | 81 | DEPOSITION=$(curl -H "Content-Type: application/json" \
|
51 |
| - -X POST\ |
52 |
| - --data "{}" \ |
53 |
| - "${DEPOSITION_ENDPOINT}?access_token=${ZENODO_TOKEN}"\ |
54 |
| - | jq .id) |
| 82 | + -X POST --data "{}" \ |
| 83 | + "${DEPOSITION_ENDPOINT}?access_token=${ZENODO_TOKEN}" | |
| 84 | + jq .id) |
55 | 85 | echo "New deposition ID is ${DEPOSITION}"
|
56 | 86 |
|
57 | 87 | # Variables
|
58 | 88 | BUCKET_DATA=$(curl "${DEPOSITION_PREFIX}/${DEPOSITION}?access_token=${ZENODO_TOKEN}")
|
59 | 89 | BUCKET=$(echo "${BUCKET_DATA}" | jq --raw-output .links.bucket)
|
60 | 90 |
|
61 | 91 | if [ "${BUCKET}" = "null" ]; then
|
62 |
| - echo "Could not find URL for upload. Response from server:" |
63 |
| - echo "${BUCKET_DATA}" |
64 |
| - exit 1 |
| 92 | + echo "Could not find URL for upload. Response from server:" |
| 93 | + echo "${BUCKET_DATA}" |
| 94 | + exit 1 |
65 | 95 | fi
|
66 | 96 |
|
67 | 97 | # Upload file
|
68 | 98 | echo "Uploading file ${FILE_TO_VERSION} to bucket ${BUCKET}"
|
69 | 99 | cat ${FILE_TO_VERSION}
|
70 | 100 | curl -o /dev/null \
|
71 |
| - --upload-file ${FILE_TO_VERSION} \ |
72 |
| - ${BUCKET}/${FILENAME}?access_token="${ZENODO_TOKEN}" |
73 |
| - |
| 101 | + --upload-file ${FILE_TO_VERSION} \ |
| 102 | + "${BUCKET}"/"${FILENAME}"?access_token="${ZENODO_TOKEN}" |
74 | 103 |
|
75 | 104 | # Upload Metadata
|
76 |
| -echo -e '{"metadata": { |
77 |
| - "title": "The Joint Undertaking for Morphological Profiling (JUMP) Consortium Datasets Index", |
78 |
| - "creators": [ |
79 |
| - { |
80 |
| - "name": "The JUMP Cell Painting Consortium" |
81 |
| - } |
82 |
| - ], |
83 |
| - "upload_type": "dataset", |
84 |
| - "access_right": "open" |
85 |
| -}}' > metadata.json |
| 105 | +echo -e "${METADATA_JSON}" >metadata.json |
86 | 106 |
|
87 | 107 | NEW_DEPOSITION_ENDPOINT="${DEPOSITION_PREFIX}/${DEPOSITION}"
|
88 | 108 | echo "Uploading file to ${NEW_DEPOSITION_ENDPOINT}"
|
89 | 109 | curl -H "Content-Type: application/json" \
|
90 |
| - -X PUT\ |
91 |
| - --data @metadata.json \ |
92 |
| - "${NEW_DEPOSITION_ENDPOINT}?access_token=${ZENODO_TOKEN}" |
| 110 | + -X PUT \ |
| 111 | + --data @metadata.json \ |
| 112 | + "${NEW_DEPOSITION_ENDPOINT}?access_token=${ZENODO_TOKEN}" |
93 | 113 |
|
94 | 114 | # Publish
|
95 | 115 | echo "Publishing to ${NEW_DEPOSITION_ENDPOINT}"
|
96 | 116 | curl -H "Content-Type: application/json" \
|
97 |
| - -X POST\ |
98 |
| - --data "{}"\ |
99 |
| - "${NEW_DEPOSITION_ENDPOINT}/actions/publish?access_token=${ZENODO_TOKEN}"\ |
100 |
| - | jq .id |
101 |
| - |
| 117 | + -X POST \ |
| 118 | + --data "{}" \ |
| 119 | + "${NEW_DEPOSITION_ENDPOINT}/actions/publish?access_token=${ZENODO_TOKEN}" | |
| 120 | + jq .id |
0 commit comments