Source code for datatoolbox.tools.pandas
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 22 09:34:50 2020
@author: ageiges
"""
import numpy as np
from datatoolbox.util import identifyCountry
from datatoolbox import mapping as mapp
from pandas_indexing import isin
def addCountryNames(table, as_index=False):
names = list()
for idx in table.index:
if idx in mapp.countries.codes.index:
names.append(mapp.countries.codes.loc[idx, "name"])
else:
names.append(idx)
if as_index:
table.index = names
else:
table.loc[:, "country_name"] = names
return table
def convertIndexToISO(table, iso_type="alpha3"):
from datatoolbox.util import identifyCountry
"""
Convert index of a dataframe into iso codes.
Parameters
----------
table : pandas.Dataframe or dt.DataTable
Index of thos table consists of country strings.
iso : TYPE, optional
Either 'alpha3', alpha2 or numISO. The default is 'alpha3'.
Returns
-------
table : pandas.Dataframe or dt.DataTable
Return old dataframe with new iso index.
"""
replaceDict = dict()
for idx in table.index:
iso = identifyCountry(idx)
if iso is not None:
replaceDict[idx] = iso
table.index = table.index.map(replaceDict)
table = table.loc[~table.index.isna(), :]
if iso_type == "alpha2":
table.index = mapp.countries.codes.loc[table.index, "alpha2"]
elif iso_type == "numISO":
table.index = mapp.countries.codes.loc[table.index, "numISO"].astype(int)
return table
[docs]
def add_standard(table, iso_type="alpha3"):
"""
Convert index of a dataframe into iso codes.
Parameters
----------
table : pandas.Dataframe or dt.DataTable
Index of thos table consists of country strings.
iso : TYPE, optional
Either 'alpha3', alpha2 or numISO. The default is 'alpha3'.
Returns
-------
table : pandas.Dataframe or dt.DataTable
Return old dataframe with new iso index.
"""
replaceDict = dict()
for idx in table.index:
iso = identifyCountry(idx)
if iso is not None:
replaceDict[idx] = iso
table.index = table.index.map(replaceDict)
table = table.loc[~table.index.isna(), :]
if iso_type == "alpha2":
table.index = mapp.countries.codes.loc[table.index, "alpha2"]
elif iso_type == "numISO":
table.index = mapp.countries.codes.loc[table.index, "numISO"].astype(int)
return table
[docs]
def yearsColumnsOnly(index):
"""
Extracts from any given index only the index list that can resemble
as year
e.g. 2001
"""
import re
REG_YEAR = re.compile("^[0-9]{4}$")
newColumns = []
for col in index:
if REG_YEAR.search(str(col)) is not None:
newColumns.append(col)
else:
try:
if ~np.isnan(col) and REG_YEAR.search(str(int(col))) is not None:
# test float string
newColumns.append(col)
except Exception:
pass
return newColumns
def index_availablity(dataframe, index_list):
available_idx = dataframe.index.intersection(index_list)
missing_idx = set(index_list).difference(available_idx)
return available_idx, missing_idx