# Now list the countries, and number of missing values per survey
forcountryinfinal.Country.unique():
print("{} has {} surveys".format(country,cb[cb.index==country].values[0]))
foriinrange(cb[cb.index==country].values[0]):
print("Survey {} of {} has {} missing values".format(i+1,final[final.Country==country].Survey.reset_index(drop=True)[i],final[final.Country==country].isna().sum(axis=1).reset_index(drop=True)[i]))
# It is of interest to see the countries that have all missing data across particular indicators and across all surveys (in other words, list the indicators for which countries that have no data)
# Again this is the number of missing values per indicator per country (number of surveys per country for which the indicator has a missing value)
# We see all 0s - this means every country has at least 1 values for each indicator across their surveys
```
%% Output
Country
Angola 0
Benin 0
Burkina Faso 0
Burundi 0
Cameroon 0
Chad 0
Congo 0
Congo Democratic Republic 0
Cote d'Ivoire 0
Ethiopia 0
Gabon 0
Gambia 0
Ghana 0
Kenya 0
Lesotho 0
Liberia 0
Malawi 0
Mali 0
Mozambique 0
Namibia 0
Niger 0
Nigeria 0
Rwanda 0
Senegal 0
Sierra Leone 0
Togo 0
Uganda 0
Zambia 0
Zimbabwe 0
dtype: int64
%% Cell type:code id: tags:
``` python
# Here some code to list the countries and indicators where they have most missing values
# min_surveys is to look only at countries with at least min_surveys+1
min_surveys = 2
# indic_difference means the number of non-missing values to look for
# i.e. if indic_difference is 1 and num_surveys is 2, then this code will list the countries that have at least 3 surveys, and the indicators for which there is only 1 non-missing value in the 3-4 available surveys
indic_difference = 1
for country in final.Country.unique():
for i in range(final[final.Country == country].shape[1]):
if final[final.Country==country].shape[0]>min_surveys:
if final[final.Country==country].isna().sum()[i] == final[final.Country==country].shape[0]-indic_difference:
print(country, final.columns[i])
```
%% Output
Lesotho Number.of.co.wives.0
Lesotho Number.of.co.wives.1
Lesotho Number.of.co.wives.2
Zimbabwe Knowledge.about.AIDS.W
Zimbabwe Knowledge.about.AIDS.M
%% Cell type:markdown id: tags:
## 2. Imputing the missing values
%% Cell type:code id: tags:
``` python
# Remove any unwanted columns before trying imputation