diff --git a/pycode/memilio-epidata/memilio/epidata/defaultDict.py b/pycode/memilio-epidata/memilio/epidata/defaultDict.py index c52be44395..b376c38bb8 100644 --- a/pycode/memilio-epidata/memilio/epidata/defaultDict.py +++ b/pycode/memilio-epidata/memilio/epidata/defaultDict.py @@ -106,7 +106,11 @@ 'region_name': 'County', 'region_id': 'ID_County', 'desc': 'Description', - 'incidence': 'Incidence' + 'incidence': 'Incidence', + 'values': 'Values', + 'description': 'Description', + 'variable': 'Variable', + 'district': 'District' } GerEng = { @@ -155,7 +159,11 @@ 'ags5': EngEng['idCounty'], 'm_code': EngEng['npiCode'], 'code': EngEng['npiCode'], - 'Bundesland_Id': EngEng['idState'] + 'Bundesland_Id': EngEng['idState'], + 'Werte': EngEng['values'], + 'Beschreibung': EngEng['description'], + 'Variable': EngEng['variable'], + 'gem_20': EngEng['district'] } EsEng = {'fecha': EngEng['date'], diff --git a/pycode/memilio-epidata/memilio/epidata/geoModificationGermany.py b/pycode/memilio-epidata/memilio/epidata/geoModificationGermany.py index 231a7a9c75..2f436531db 100644 --- a/pycode/memilio-epidata/memilio/epidata/geoModificationGermany.py +++ b/pycode/memilio-epidata/memilio/epidata/geoModificationGermany.py @@ -632,3 +632,63 @@ def merge_df_counties_all( df = merge_df_counties(df, key, val, sorting, columns, method) return df + + +def regiostar_mapping(RegioStaR='7', count_multiple_entries=False): + """! Mapping of RegioStaR categories to countyIDs + + @param RegioStar str One of [2,4,17,7(default),5,Gem7,Gem5] + @return Dict Mapped Regiostar + """ + if not isinstance(RegioStaR, str): + RegioStaR = str(RegioStaR) + RegioStaR = 'Regiostar'+RegioStaR + # read file + xlsx = pd.ExcelFile( + 'https://www.mcloud.de/downloads/mcloud/536149D1-2902-4975-9F7D-253191C0AD07/RegioStaR-Referenzdateien.xlsx', engine='openpyxl') + # save different sheets into different variables + codeplan = pd.read_excel(xlsx, sheet_name='Codeplan', header=4) + codeplan.rename(dd.GerEng, axis=1, inplace=True) + codelist = pd.read_excel(xlsx, sheet_name='ReferenzGebietsstand2020') + codelist.rename(dd.GerEng, axis=1, inplace=True) + # get start and end point of regiostar values + idx_begin = np.where(codeplan[dd.EngEng['variable']] == RegioStaR)[0][0] + idx_end = idx_begin+1 + + while pd.isnull(codeplan[dd.EngEng['variable']][idx_end]): + idx_end += 1 + # cut codeplan + codeplan = codeplan.iloc[idx_begin:idx_end] + # save in array + regio_values = codeplan[dd.EngEng['values']].values + # save them as dict + RegioDict = dict() + # + new_cols = list(codelist.columns[:9]) + new_cols += [c.lower() for c in codelist.columns[9:]] + codelist.columns = new_cols + + if count_multiple_entries: + # create zero-filled dataframe with countyIDs as rows and RegioStaRIDs as columns + count_df = pd.DataFrame( + 0, columns=codeplan[dd.EngEng['description']], index=dd.County.keys()) + + for v in regio_values: + regio_desc = codeplan[codeplan[dd.EngEng['values']] + == v][dd.EngEng['description']].values[0] + subframe = codelist.iloc[np.where(codelist[RegioStaR.lower()] == v)] + # remove last 3 digits of gem_20 -> now represents CountyID + district_ids = subframe[dd.EngEng['district']].values + county_ids = [str(d_id)[:-3] for d_id in district_ids] + if count_multiple_entries: + for c_id in county_ids: + count_df.at[int(c_id), regio_desc] += 1 + unique_county_ids = set(county_ids) + # write into dict + RegioDict[regio_desc] = unique_county_ids + + if count_multiple_entries: + gd.write_dataframe(count_df, os.path.join( + dd.defaultDict['out_folder'], 'Germany'), 'multiple_entries_'+RegioStaR.lower(), 'txt') + + return RegioDict