Skip to content

912 RegioStaR mapping #924

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions pycode/memilio-epidata/memilio/epidata/defaultDict.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,11 @@
'region_name': 'County',
'region_id': 'ID_County',
'desc': 'Description',
'incidence': 'Incidence'
'incidence': 'Incidence',
'values': 'Values',
'description': 'Description',
'variable': 'Variable',
'district': 'District'
}

GerEng = {
Expand Down Expand Up @@ -155,7 +159,11 @@
'ags5': EngEng['idCounty'],
'm_code': EngEng['npiCode'],
'code': EngEng['npiCode'],
'Bundesland_Id': EngEng['idState']
'Bundesland_Id': EngEng['idState'],
'Werte': EngEng['values'],
'Beschreibung': EngEng['description'],
'Variable': EngEng['variable'],
'gem_20': EngEng['district']
}

EsEng = {'fecha': EngEng['date'],
Expand Down
60 changes: 60 additions & 0 deletions pycode/memilio-epidata/memilio/epidata/geoModificationGermany.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,3 +632,63 @@ def merge_df_counties_all(
df = merge_df_counties(df, key, val, sorting, columns, method)

return df


def regiostar_mapping(RegioStaR='7', count_multiple_entries=False):
"""! Mapping of RegioStaR categories to countyIDs

@param RegioStar str One of [2,4,17,7(default),5,Gem7,Gem5]
@return Dict Mapped Regiostar
"""
if not isinstance(RegioStaR, str):
RegioStaR = str(RegioStaR)
RegioStaR = 'Regiostar'+RegioStaR
# read file
xlsx = pd.ExcelFile(
'https://www.mcloud.de/downloads/mcloud/536149D1-2902-4975-9F7D-253191C0AD07/RegioStaR-Referenzdateien.xlsx', engine='openpyxl')
# save different sheets into different variables
codeplan = pd.read_excel(xlsx, sheet_name='Codeplan', header=4)
codeplan.rename(dd.GerEng, axis=1, inplace=True)
codelist = pd.read_excel(xlsx, sheet_name='ReferenzGebietsstand2020')
codelist.rename(dd.GerEng, axis=1, inplace=True)
# get start and end point of regiostar values
idx_begin = np.where(codeplan[dd.EngEng['variable']] == RegioStaR)[0][0]
idx_end = idx_begin+1

while pd.isnull(codeplan[dd.EngEng['variable']][idx_end]):
idx_end += 1
# cut codeplan
codeplan = codeplan.iloc[idx_begin:idx_end]
# save in array
regio_values = codeplan[dd.EngEng['values']].values
# save them as dict
RegioDict = dict()
#
new_cols = list(codelist.columns[:9])
new_cols += [c.lower() for c in codelist.columns[9:]]
codelist.columns = new_cols

if count_multiple_entries:
# create zero-filled dataframe with countyIDs as rows and RegioStaRIDs as columns
count_df = pd.DataFrame(
0, columns=codeplan[dd.EngEng['description']], index=dd.County.keys())

for v in regio_values:
regio_desc = codeplan[codeplan[dd.EngEng['values']]
== v][dd.EngEng['description']].values[0]
subframe = codelist.iloc[np.where(codelist[RegioStaR.lower()] == v)]
# remove last 3 digits of gem_20 -> now represents CountyID
district_ids = subframe[dd.EngEng['district']].values
county_ids = [str(d_id)[:-3] for d_id in district_ids]
if count_multiple_entries:
for c_id in county_ids:
count_df.at[int(c_id), regio_desc] += 1
unique_county_ids = set(county_ids)
# write into dict
RegioDict[regio_desc] = unique_county_ids

if count_multiple_entries:
gd.write_dataframe(count_df, os.path.join(
dd.defaultDict['out_folder'], 'Germany'), 'multiple_entries_'+RegioStaR.lower(), 'txt')

return RegioDict