1
1
import os
2
2
import requests
3
3
import pandas as pd
4
+ import re
5
+ import json
4
6
5
7
from bs4 import BeautifulSoup
6
8
@@ -27,6 +29,20 @@ def get_link():
27
29
soup = BeautifulSoup (response .text , 'html.parser' )
28
30
link = soup .find_all ('a' , {'class' : 'mtli_attachment' })
29
31
link = link [0 ].get ('href' )
32
+ # Extract the date using regex
33
+ date_match = re .search (r'v(\d{8})\.xlsx' , link )
34
+ if date_match :
35
+ new_version = date_match .group (1 )
36
+ dpfile = 'datapackage.json'
37
+ with open (dpfile , 'r' , encoding = 'utf-8' ) as f :
38
+ datapackage = json .load (f )
39
+
40
+ old_version = datapackage .get ('version' , 'unknown' )
41
+ datapackage ['version' ] = new_version
42
+
43
+ with open (dpfile , 'w' , encoding = 'utf-8' ) as f :
44
+ json .dump (datapackage , f , indent = 2 , ensure_ascii = False )
45
+
30
46
return link
31
47
32
48
def retrieve_content (link ):
@@ -49,7 +65,7 @@ def convert_dms(dms):
49
65
degrees = int (dms_values [0 ])
50
66
minutes = int (dms_values [1 ])
51
67
seconds = float (dms_values [2 ])
52
-
68
+
53
69
return dms_to_decimal (degrees , minutes , seconds , direction )
54
70
55
71
def transform_csv ():
@@ -76,4 +92,4 @@ def clean_up():
76
92
link = get_link ()
77
93
retrieve_content (link )
78
94
transform_csv ()
79
- clean_up ()
95
+ clean_up ()
0 commit comments