import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
# Lecture du fichier de données aggrégées
df = pd.read_csv("../data/data_aggregate_PDL.csv")
def computePercentage(cols, colref):
#calcul un pourcentage en prenant l'effectif d'une somme de colonnes vis-à-vis d'un valeur de référence
return(df[cols].sum(axis = 1) /df[colref])
#Traitement des colonnes
def findCols(df, s):
#retourne une liste des colonnes contenant la chaine de caractères
l = []
for col in df.columns:
if s in col:
l.append(col)
#print(col)
return(l)
cols = findCols(df, 'NIV_')
def findEtudeNCols(df, etude, s):
#Parmi les données insee, utilise le prefix de nom de colonne pour effectuer une recherche de colonnes uniqueent dans ce subset
#retourne une liste des colonnes contenant la chaine de caractères
l = []
for col in df.columns:
if col.split("_")[0] == etude:
if s in col:
l.append(col)
#print(col)
return(l)
cols = findEtudeNCols(df, 'POP2', 'NA38AZ')
#Proportion de chomeurs dans la population
df['p_chom'] = (df['P17_CHOM1564']/df['P17_POP'])
#Pourcentage de personnes de plus de 65 ans
cols = findCols(df, 'POP5_SEXE1_AGEQ65065') + findCols(df, 'POP5_SEXE2_AGEQ65065')
df['p_65+'] = computePercentage(cols, 'P17_POP')
#Pourcentage de 15-29 ans
cols = findCols(df, 'ACT2A_AGEQ65015_SEXE') + findCols(df, 'ACT2A_AGEQ65020_SEXE') + findCols(df, 'ACT2A_AGEQ65025_SEXE')
df['p_15_29'] = computePercentage(cols, 'P17_POP')
#Calcul le nombre de ménages par commune
cols = findCols(df, 'MEN1_NPERC')
df['nb_menages'] = df[cols].sum(axis = 1)
#Pourcentage de ménage à 1 personne
cols = findCols(df, 'MEN1_NPERC1')
df['p_foyer1pers'] = computePercentage(cols, 'nb_menages')
#Calcul le nombre de familles par commune
cols = findCols(df, 'MEN1_NPERC')
df['nb_familles'] = df[cols].sum(axis = 1)
#Pourcentage de famille mono parentales
cols = findCols(df, 'MEN1_NPERC1')
df['p_fammono'] = computePercentage(cols, 'nb_familles')
#Nbe de personnes dans l'ech
cols = findCols(df, 'FOR2_AGEQ650')
df['nb_catdipl'] = df[cols].sum(axis = 1)
# Pourcentage de pas ou faiblement diplomés
cols = findCols(df, 'DIPL_19A') + findCols(df, 'DIPL_19B') + findCols(df, 'DIPL_19C')
filters = ['AGEQ65015'] #filtre personnes de moins de 20 ans
cols = [s for s in cols if not any(xs in s for xs in filters)]
df['p_faible_dipl'] = computePercentage(cols, 'nb_catdipl')
#Pourcentage de personnes de plus de 75 ans
df['MEN3'] = df[findCols(df, 'AGEQ80_14')].sum(axis=1)
cols = findCols(df, 'AGEQ80_14075') + findCols(df, 'AGEQ80_14080')
df['p_75+'] = computePercentage(cols, 'MEN3')
#Pourcentage de personnes de plus de 65-74 ans
cols = findCols(df, 'AGEQ80_14060') + findCols(df, 'AGEQ80_14065') + findCols(df, 'AGEQ80_14070')
df['p_60_74'] = computePercentage(cols, 'MEN3')
# Proportion de personnes de plus de 20 ans sans diplome
cols = findCols(df, 'DIPL_19')[2:] # POur ne garder queles plus de 20 ans
df['DIPL_20+'] = df[cols].sum(axis=1)
cols = findCols(df, 'DIPL_19A')
df['p_0dipl'] = computePercentage(cols, 'DIPL_20+')
df['p_0dipl']
# Proportion de personnes de plus de 20 ans avec un faible diplome
cols = findCols(df, 'DIPL_19B') + findCols(df, 'DIPL_19C')
df['p_faibldipl'] = computePercentage(cols, 'DIPL_20+')
#Calcul des catégories socioprofessionnelles
colsRef = findEtudeNCols(df, 'POP6', 'CS1_8')
cols = findEtudeNCols(df, 'POP6', 'CS1_86')
df['p_ouv'] = round(df[cols].sum(axis=1)/df[colsRef].sum(axis=1), 4)
cols = findEtudeNCols(df, 'POP6', 'CS1_85')
df['p_empl'] = round(df[cols].sum(axis=1)/df[colsRef].sum(axis=1), 4)
cols = findEtudeNCols(df, 'POP6', 'CS1_82')
df['p_ind'] = round(df[cols].sum(axis=1)/df[colsRef].sum(axis=1), 4)
# Données caf
#Proportion de personnes
df['p_TR50'] = df['PRES_TR50PFRB'] /df['P17_POP']
df['p_TR100'] = df['PRES_TR100PFRB'] /df['P17_POP']
#Proportion ouvrier/agri
cols = findCols(df, 'POP6_CS1_8')
df['POP6CS1_20+'] = df[cols].sum(axis=1)
cols = findCols(df, 'CS1_81') + findCols(df, 'CS1_86')
df['p_ouvragri'] = computePercentage(cols, 'POP6CS1_20+')
#Proportion indépendants
cols = findCols(df, 'CS1_82')
df['p_indé'] = computePercentage(cols, 'POP6CS1_20+')
#COrrection data
df.at[735,'p_ind'] = 0.15
cols
est une liste de listes contenant pour chaque item:
cols = [['p_75+', '+', 'Score +75 ans', 3],
['p_60_74', '+', 'Score 60-74 ans', 2],
['p_15_29', '+', 'Score 15-29ans', 2],
['p_0dipl', '+', 'Score aucun diplome', 3],
['p_faibldipl', '+', 'Score faible diplôme', 2],
['MED17', '-', 'Score revenus\nmedian faible', 1],
['p_foyer1pers', '+', 'Score familles\nmonoparentales\n& foyer 1 pers', 2],
['p_ind', '+', 'Score d\'independants', 1],
['p_empl', '+', 'Score d\'employés', 1],
['p_ouv', '+', 'Score d\'ouvriers', 3]]
def computeScoreWithNA(df, cols):
#Retourne un df de scores de précarité de plusieurs colonnes, peut comporter des colonnes avec NAs
# coldic: dictionnaire de avec orientation
# Les valeurs manquantes ne sont pas prise en compte dans le calcul de normalisation et moyenne du score global
dfT = pd.DataFrame(df.CODGEO)
for col in cols:
variable = col[0]
#df[col] = df.loc[:,col].fillna(df.loc[:,col].mean()) # Rempalce les valeurs manquantes par la moyenne de région
orientation = col[1]
#print(variable, orientation)
vals = df[variable]
if orientation == '-':
dfT[variable + '_score'] = round(((((vals.mean()- vals) / vals.mean()) +1)*100), 1)
elif orientation == '+':
dfT[variable + '_score'] = round(((((vals - vals.mean()) / vals.mean()) +1)*100), 1)
dfT[variable + '_score'] = dfT[variable + '_score']
#display(dfT)
return(dfT)
dfS = computeScoreWithNA(df, cols)
# Gestion des valeurs manquante pour l'indicateur de revenu median
dfS['MED17_score'] = dfS['MED17_score'].fillna(dfS['MED17_score'].median())
#Normalisation des colonnes de scores sur 0-100
def norm(s):
#normalisation d'une serie sur 0-100
s = 100 * (s - s.min()) / (s.max() - s.min())
s = round(s, 1)
return(s)
for col in dfS:
if '_score' in col:
dfS[col + '_norm'] = norm(dfS[col])
# Ne garde que les colonnes normalisées
for col in dfS.columns:
if col.endswith('_score'):
dfS = dfS.drop(col, axis=1)
#Calcul d'un score de précarité sur 0-100 avec facteurs de pondération
coef_sum = 0
dfS['score'] = 0
for col in cols:
coef = int(col[3])
coef_sum += coef
dfS['score'] += dfS[col[0] + '_score_norm'] * coef
dfS= dfS
dfS['score'] = round(dfS['score']/coef_sum, 1)
dfS.at[820,'score'] = dfS['score'].mean()
s = dfS['score']
s = 100 * (s - s.min()) / (s.max() - s.min())
s = round(s, 1)
dfS['score'] = s
# AJout de données sur la commune
dfS['CODGEO'] = dfS['CODGEO'].astype(int)
dfS['Commune'] = df['LIBGEO_x']
dfS['Département'] = df['DEP_x'].astype(int)
#dfS['Dynamique Démographique'] = df['Dynamique Démographique BV']
dfS['Environnement Démographique'] = df['Environnement Démographique'].str.replace(" en croissance démographique", "")
# Réorganisation ordre des colonnes
dfS = dfS[['CODGEO', 'Commune', 'Département',
'Environnement Démographique', 'score', 'p_60_74_score_norm', 'p_75+_score_norm',
'p_15_29_score_norm', 'p_0dipl_score_norm', 'p_faibldipl_score_norm',
'MED17_score_norm', 'p_foyer1pers_score_norm', 'p_ind_score_norm', 'p_empl_score_norm','p_ouv_score_norm'
]]
# Renommage colonnes
for col in dfS.columns:
if "_score_norm" in col:
newcolname = col.replace("_score_norm", "")
newcolname = newcolname.replace("p_", "")
newcolname = newcolname.replace("MED17", "revmed")
newcolname = newcolname.replace('foyer1pers', 'foyer1')
newcolname = 'score_' + newcolname
dfS = dfS.rename(columns={col: newcolname})
dfS.columns
Index(['CODGEO', 'Commune', 'Département', 'Environnement Démographique', 'score', 'score_60_74', 'score_75+', 'score_15_29', 'score_0dipl', 'score_faibldipl', 'score_revmed', 'score_foyer1', 'score_ind', 'score_empl', 'score_ouv'], dtype='object')
def exportDatatable(df, filename):
""" export un data en datable jquery"""
dfH = df.to_html(index=False,escape=False)
dfH=dfH.replace('<th>','<th class = "th-sm" style="text-align: left">').replace('border="1"','id="example"').replace('class="dataframe"','class="display"')
msgxx ='''
<script type="text/javascript" src="https://code.jquery.com/jquery-3.3.1.js"></script>
<script type="text/javascript" src="https://cdn.datatables.net/1.10.20/js/jquery.dataTables.min.js"></script>
<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.10.20/css/jquery.dataTables.min.css"/>
<script>
$(document).ready(function() {
$('#example').DataTable( {
"pageLength": 50,
"language": {"url": "https://cdn.datatables.net/plug-ins/1.10.24/i18n/French.json"}
} );
} );</script>
'''
dfH = msgxx + dfH
dfH=dfH.replace('<th>','<th class = "th-sm"')
f= open(filename, 'w+')
f.write(dfH)
f.close()
#exportDatatable(df, 'ftth_table.html')
exportDatatable(dfS, './tables/scores.html')
#dfS.to_html('./tables/scores.html', index=False)
dfS.to_csv('./scores.csv', sep='\t', index=False)
#Export d'une table pour chaque département
for dep in [44,49,53,72,85]:
dfD = dfS[dfS['Département'] == dep]
dfD = dfD.drop('Département', axis=1)
exportDatatable(dfD, './tables/scores_' + str(dep) + '.html')
CODGEO | Commune | Département | Environnement Démographique | score | score_60_74 | score_75+ | score_15_29 | score_0dipl | score_faibldipl | score_revmed | score_foyer1 | score_ind | score_empl | score_ouv | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
980 | 85001 | L'Aiguillon-sur-Mer | 85 | Bassin Résidentiel | 82.5 | 58.0 | 65.9 | 6.1 | 53.6 | 42.3 | 64.6 | 76.1 | 21.3 | 15.3 | 14.5 |
981 | 85002 | L'Aiguillon-sur-Vie | 85 | Bassin Résidentiel | 68.4 | 37.7 | 34.7 | 25.7 | 35.8 | 44.6 | 65.2 | 58.3 | 24.6 | 36.7 | 38.8 |
982 | 85003 | Aizenay | 85 | Bassin Industriel | 50.9 | 30.5 | 22.3 | 31.4 | 36.5 | 32.6 | 59.3 | 47.5 | 22.4 | 36.7 | 36.2 |
983 | 85004 | Angles | 85 | Bassin Résidentiel | 64.9 | 69.9 | 41.5 | 13.2 | 42.0 | 48.9 | 57.7 | 61.5 | 15.0 | 25.5 | 10.6 |
984 | 85005 | Antigny | 85 | Bassin Industriel | 69.8 | 38.4 | 25.3 | 34.0 | 43.3 | 40.7 | 69.5 | 44.3 | 26.4 | 31.0 | 49.4 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1233 | 85303 | Vix | 85 | Bassin Résidentiel | 74.4 | 32.9 | 37.0 | 25.2 | 51.7 | 39.0 | 72.1 | 56.8 | 31.6 | 28.6 | 38.3 |
1234 | 85304 | Vouillé-les-Marais | 85 | Bassin Résidentiel | 56.5 | 43.7 | 23.7 | 16.7 | 43.7 | 40.9 | 80.4 | 52.9 | 14.8 | 29.2 | 28.5 |
1235 | 85305 | Vouvant | 85 | Bassin Résidentiel | 61.4 | 40.4 | 38.4 | 14.6 | 46.1 | 27.3 | 66.6 | 50.1 | 37.4 | 27.0 | 32.8 |
1236 | 85306 | Xanton-Chassenon | 85 | Bassin Résidentiel | 37.5 | 28.8 | 6.9 | 24.7 | 34.8 | 37.0 | 65.7 | 31.6 | 20.9 | 49.1 | 35.5 |
1237 | 85307 | La Faute-sur-Mer | 85 | Bassin Résidentiel | 70.5 | 52.6 | 61.1 | 5.5 | 39.2 | 38.9 | 49.1 | 73.8 | 59.8 | 20.8 | 9.4 |
258 rows × 15 columns
exportDatatable(dfS, './tables/scores2.html')
# Histogramme de distribution du score pondéré
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
dfS['score'].hist(bins=35, ax=ax)
fig.savefig('./figures/hist_score.png')
d = dfS.iloc[:,5:].describe().T[['mean', 'std', '25%','50%','75%']].round(1)
d.columns
dicR = {'mean': 'moyenne', 'std':'écart type'}
d =d.rename(columns=dicR).T
display(d)
print(d.to_markdown())
score_60_74 | score_75+ | score_15_29 | score_0dipl | score_faibldipl | score_revmed | score_foyer1 | score_ind | score_empl | score_ouv | |
---|---|---|---|---|---|---|---|---|---|---|
moyenne | 33.1 | 28.5 | 25.5 | 37.5 | 37.8 | 63.2 | 50.2 | 21.3 | 31.4 | 37.1 |
écart type | 10.6 | 13.3 | 10.7 | 12.1 | 8.4 | 13.9 | 12.3 | 12.7 | 8.9 | 12.9 |
25% | 26.9 | 20.2 | 19.2 | 28.8 | 32.6 | 55.9 | 41.7 | 14.0 | 26.4 | 28.5 |
50% | 32.0 | 26.7 | 25.5 | 36.2 | 37.8 | 65.2 | 49.3 | 19.8 | 31.5 | 36.9 |
75% | 38.0 | 35.8 | 30.8 | 44.5 | 42.5 | 72.1 | 57.6 | 26.8 | 36.3 | 44.5 |
| | score_60_74 | score_75+ | score_15_29 | score_0dipl | score_faibldipl | score_revmed | score_foyer1 | score_ind | score_empl | score_ouv | |:-----------|--------------:|------------:|--------------:|--------------:|------------------:|---------------:|---------------:|------------:|-------------:|------------:| | moyenne | 33.1 | 28.5 | 25.5 | 37.5 | 37.8 | 63.2 | 50.2 | 21.3 | 31.4 | 37.1 | | écart type | 10.6 | 13.3 | 10.7 | 12.1 | 8.4 | 13.9 | 12.3 | 12.7 | 8.9 | 12.9 | | 25% | 26.9 | 20.2 | 19.2 | 28.8 | 32.6 | 55.9 | 41.7 | 14 | 26.4 | 28.5 | | 50% | 32 | 26.7 | 25.5 | 36.2 | 37.8 | 65.2 | 49.3 | 19.8 | 31.5 | 36.9 | | 75% | 38 | 35.8 | 30.8 | 44.5 | 42.5 | 72.1 | 57.6 | 26.8 | 36.3 | 44.5 |
len(cols)
10
fig, axs = plt.subplots(ncols=5, figsize=(18, 4))
fig.subplots_adjust(left=0.042,right=0.981,hspace=0.35, wspace=0.2)
#fig.suptitle("Proposez-vous des initiations et/ou des formations au numérique ?\n")
i=0
for col in cols[:5]:
newcol = col[0].replace('p_', 'score_')
if col[0] == 'MED17':
newcol = 'score_revmed'
if col[0] == 'p_foyer1pers':
newcol = 'score_foyer1'
dfS[newcol].plot(kind='hist', subplots=True, ax=axs[i], bins=20, ylim =[0,350])#, color=['C3', 'C0', 'C2', 'C4'], yticks =range(0,61,10), ylim =[0,35], fontsize=16, rot=45)
axs[i].set_title(col[2].replace('Score ','Score\n'), fontsize=12)
i+=1
plt.savefig('./figures/hist_scores1.png')
fig, axs = plt.subplots(ncols=5, figsize=(18, 4))
fig.subplots_adjust(left=0.042,right=0.981,hspace=0.35, wspace=0.2)
i=0
for col in cols[5:]:
newcol = col[0].replace('p_', 'score_')
if col[0] == 'MED17':
newcol = 'score_revmed'
if col[0] == 'p_foyer1pers':
newcol = 'score_foyer1'
dfS[newcol].plot(kind='hist', subplots=True, ax=axs[i], bins=20, ylim =[0,350])#, color=['C3', 'C0', 'C2', 'C4'], yticks =range(0,61,10), ylim =[0,35], fontsize=16, rot=45)
axs[i].set_title(col[2].replace('Score ','Score\n'), fontsize=12)
i+=1
plt.savefig('./figures/hist_scores2.png')
d = dfS.groupby(["Département"])['Commune'].count()
s = dfS.groupby(["Département"])['score'].mean()
sup = pd.Series([6809,7172,5175,6206,6720] , index =[44, 49, 53, 72, 85], name='superficie km²')
dens = pd.Series(d/sup*100, name='Communes/100km²')
d = pd.concat([d, s, sup, dens], axis=1).round(1)
print(d.to_markdown())
display(d)
| | Commune | score | superficie km² | Communes/100km² | |---:|----------:|--------:|-----------------:|------------------:| | 44 | 207 | 45.8 | 6809 | 3 | | 49 | 177 | 52.4 | 7172 | 2.5 | | 53 | 242 | 60.1 | 5175 | 4.7 | | 72 | 354 | 58.5 | 6206 | 5.7 | | 85 | 258 | 60.5 | 6720 | 3.8 |
Commune | score | superficie km² | Communes/100km² | |
---|---|---|---|---|
44 | 207 | 45.8 | 6809 | 3.0 |
49 | 177 | 52.4 | 7172 | 2.5 |
53 | 242 | 60.1 | 5175 | 4.7 |
72 | 354 | 58.5 | 6206 | 5.7 |
85 | 258 | 60.5 | 6720 | 3.8 |
# ANalyse par département
d = dfS.groupby(["Département"])['score', 'score_60_74', 'score_75+', 'score_15_29', 'score_0dipl', 'score_faibldipl', 'score_revmed', 'score_foyer1', 'score_ind', 'score_empl', 'score_ouv'].mean().round(1)
display(d)
print(d.to_markdown())
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:2: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
score | score_60_74 | score_75+ | score_15_29 | score_0dipl | score_faibldipl | score_revmed | score_foyer1 | score_ind | score_empl | score_ouv | |
---|---|---|---|---|---|---|---|---|---|---|---|
Département | |||||||||||
44 | 45.8 | 31.2 | 25.1 | 26.1 | 29.1 | 36.6 | 55.4 | 50.3 | 21.1 | 33.6 | 32.8 |
49 | 52.4 | 33.3 | 28.6 | 24.9 | 38.1 | 32.4 | 62.1 | 48.0 | 20.7 | 31.4 | 35.1 |
53 | 60.1 | 31.0 | 31.1 | 25.6 | 42.2 | 35.6 | 69.8 | 50.7 | 19.3 | 28.7 | 39.1 |
72 | 58.5 | 33.7 | 27.4 | 25.3 | 39.1 | 40.9 | 62.4 | 49.7 | 22.1 | 31.6 | 38.3 |
85 | 60.5 | 35.6 | 30.6 | 25.4 | 37.1 | 40.0 | 65.3 | 51.9 | 22.5 | 31.7 | 38.2 |
| Département | score | score_60_74 | score_75+ | score_15_29 | score_0dipl | score_faibldipl | score_revmed | score_foyer1 | score_ind | score_empl | score_ouv | |--------------:|--------:|--------------:|------------:|--------------:|--------------:|------------------:|---------------:|---------------:|------------:|-------------:|------------:| | 44 | 45.8 | 31.2 | 25.1 | 26.1 | 29.1 | 36.6 | 55.4 | 50.3 | 21.1 | 33.6 | 32.8 | | 49 | 52.4 | 33.3 | 28.6 | 24.9 | 38.1 | 32.4 | 62.1 | 48 | 20.7 | 31.4 | 35.1 | | 53 | 60.1 | 31 | 31.1 | 25.6 | 42.2 | 35.6 | 69.8 | 50.7 | 19.3 | 28.7 | 39.1 | | 72 | 58.5 | 33.7 | 27.4 | 25.3 | 39.1 | 40.9 | 62.4 | 49.7 | 22.1 | 31.6 | 38.3 | | 85 | 60.5 | 35.6 | 30.6 | 25.4 | 37.1 | 40 | 65.3 | 51.9 | 22.5 | 31.7 | 38.2 |
d = dfS.groupby(["Environnement Démographique"])['score', 'score_60_74', 'score_75+', 'score_15_29', 'score_0dipl', 'score_faibldipl', 'score_revmed', 'score_foyer1', 'score_ind', 'score_empl', 'score_ouv'].mean().round(1)
display(d)
print(d.to_markdown())
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead. """Entry point for launching an IPython kernel.
score | score_60_74 | score_75+ | score_15_29 | score_0dipl | score_faibldipl | score_revmed | score_foyer1 | score_ind | score_empl | score_ouv | |
---|---|---|---|---|---|---|---|---|---|---|---|
Environnement Démographique | |||||||||||
Bassin Industriel | 60.9 | 33.2 | 28.2 | 25.6 | 40.4 | 38.8 | 66.4 | 51.3 | 20.2 | 29.7 | 41.3 |
Bassin Résidentiel | 57.1 | 35.7 | 30.9 | 23.4 | 36.4 | 40.0 | 63.6 | 52.4 | 23.5 | 32.4 | 33.1 |
Bassin Urbain | 43.6 | 33.7 | 25.0 | 25.6 | 31.1 | 33.8 | 51.9 | 47.2 | 20.6 | 33.9 | 30.4 |
Bassin diversifié | 58.2 | 28.9 | 26.7 | 28.3 | 37.4 | 38.8 | 66.7 | 47.7 | 21.2 | 31.2 | 43.5 |
Bassins Agroalimentaire | 61.9 | 31.2 | 32.9 | 25.8 | 43.1 | 36.0 | 69.8 | 50.8 | 20.8 | 29.4 | 38.4 |
| Environnement Démographique | score | score_60_74 | score_75+ | score_15_29 | score_0dipl | score_faibldipl | score_revmed | score_foyer1 | score_ind | score_empl | score_ouv | |:------------------------------|--------:|--------------:|------------:|--------------:|--------------:|------------------:|---------------:|---------------:|------------:|-------------:|------------:| | Bassin Industriel | 60.9 | 33.2 | 28.2 | 25.6 | 40.4 | 38.8 | 66.4 | 51.3 | 20.2 | 29.7 | 41.3 | | Bassin Résidentiel | 57.1 | 35.7 | 30.9 | 23.4 | 36.4 | 40 | 63.6 | 52.4 | 23.5 | 32.4 | 33.1 | | Bassin Urbain | 43.6 | 33.7 | 25 | 25.6 | 31.1 | 33.8 | 51.9 | 47.2 | 20.6 | 33.9 | 30.4 | | Bassin diversifié | 58.2 | 28.9 | 26.7 | 28.3 | 37.4 | 38.8 | 66.7 | 47.7 | 21.2 | 31.2 | 43.5 | | Bassins Agroalimentaire | 61.9 | 31.2 | 32.9 | 25.8 | 43.1 | 36 | 69.8 | 50.8 | 20.8 | 29.4 | 38.4 |
d = dfS.groupby(["Environnement Démographique", "Département"])['score', 'score_60_74', 'score_75+', 'score_15_29', 'score_0dipl', 'score_faibldipl', 'score_revmed', 'score_foyer1', 'score_ind', 'score_empl', 'score_ouv'].mean().round(1)
display(d)
print(d.to_markdown())
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead. """Entry point for launching an IPython kernel.
score | score_60_74 | score_75+ | score_15_29 | score_0dipl | score_faibldipl | score_revmed | score_foyer1 | score_ind | score_empl | score_ouv | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|
Environnement Démographique | Département | |||||||||||
Bassin Industriel | 44 | 48.7 | 29.2 | 24.1 | 26.8 | 32.2 | 37.1 | 60.0 | 47.0 | 21.1 | 34.0 | 37.1 |
49 | 58.7 | 33.4 | 28.2 | 25.0 | 42.2 | 33.0 | 68.2 | 49.2 | 18.7 | 30.2 | 40.9 | |
53 | 63.8 | 33.3 | 32.4 | 24.2 | 44.7 | 36.9 | 71.8 | 53.3 | 18.2 | 26.6 | 39.5 | |
72 | 67.0 | 36.5 | 28.1 | 24.5 | 43.7 | 43.0 | 66.5 | 53.9 | 22.3 | 29.3 | 42.1 | |
85 | 60.2 | 31.1 | 27.2 | 28.6 | 37.0 | 38.5 | 65.2 | 49.8 | 19.1 | 29.7 | 46.3 | |
Bassin Résidentiel | 44 | 48.6 | 33.0 | 25.8 | 25.2 | 28.7 | 39.5 | 59.0 | 51.3 | 21.8 | 34.2 | 32.9 |
49 | 52.1 | 31.8 | 26.7 | 26.0 | 36.1 | 32.6 | 63.3 | 49.3 | 24.0 | 29.9 | 36.6 | |
53 | 76.6 | 34.8 | 46.5 | 18.2 | 49.2 | 37.2 | 75.8 | 67.0 | 18.7 | 29.9 | 36.2 | |
72 | 56.3 | 32.2 | 29.0 | 24.6 | 38.1 | 40.4 | 63.7 | 49.3 | 22.3 | 32.8 | 35.1 | |
85 | 63.3 | 41.4 | 35.8 | 20.9 | 39.3 | 41.1 | 66.1 | 55.5 | 25.9 | 31.4 | 30.7 | |
Bassin Urbain | 44 | 36.1 | 32.6 | 25.8 | 26.1 | 24.2 | 30.9 | 41.0 | 53.6 | 21.0 | 32.0 | 24.4 |
49 | 47.9 | 35.5 | 27.6 | 24.4 | 34.9 | 31.9 | 55.9 | 48.0 | 22.7 | 32.4 | 30.8 | |
53 | 44.7 | 28.8 | 21.8 | 27.1 | 32.3 | 33.8 | 58.1 | 43.6 | 16.4 | 36.0 | 37.7 | |
72 | 41.7 | 36.1 | 23.9 | 24.3 | 32.4 | 36.9 | 48.9 | 43.0 | 19.7 | 34.6 | 27.7 | |
85 | 47.9 | 33.0 | 23.9 | 27.8 | 29.1 | 37.4 | 57.7 | 48.1 | 22.2 | 36.5 | 34.3 | |
Bassin diversifié | 44 | 49.5 | 25.0 | 22.1 | 28.0 | 31.1 | 40.0 | 64.1 | 49.3 | 17.5 | 35.4 | 39.5 |
49 | 56.0 | 30.2 | 30.2 | 25.9 | 41.0 | 31.9 | 66.7 | 47.5 | 18.4 | 32.3 | 38.5 | |
53 | 56.6 | 26.8 | 25.5 | 30.6 | 36.2 | 37.6 | 69.1 | 46.5 | 21.3 | 29.5 | 44.3 | |
72 | 59.8 | 29.2 | 26.1 | 28.3 | 37.3 | 42.0 | 63.5 | 46.9 | 22.8 | 30.0 | 46.5 | |
85 | 63.5 | 32.3 | 28.2 | 27.1 | 38.6 | 41.6 | 69.8 | 50.7 | 22.5 | 33.6 | 43.3 | |
Bassins Agroalimentaire | 44 | 56.6 | 30.7 | 28.9 | 23.6 | 40.2 | 35.4 | 62.2 | 51.1 | 18.7 | 29.9 | 40.4 |
49 | 53.2 | 31.4 | 30.6 | 24.4 | 39.4 | 33.5 | 64.9 | 46.8 | 19.0 | 30.2 | 34.8 | |
53 | 66.2 | 31.8 | 38.1 | 23.6 | 49.2 | 33.1 | 74.9 | 53.3 | 21.5 | 25.9 | 35.8 | |
72 | 64.0 | 30.5 | 28.5 | 28.8 | 40.7 | 40.9 | 68.8 | 52.4 | 24.1 | 34.7 | 40.8 | |
85 | 61.3 | 30.3 | 28.2 | 30.4 | 36.1 | 41.0 | 65.9 | 48.3 | 18.3 | 31.4 | 46.2 |
| | score | score_60_74 | score_75+ | score_15_29 | score_0dipl | score_faibldipl | score_revmed | score_foyer1 | score_ind | score_empl | score_ouv | |:--------------------------------|--------:|--------------:|------------:|--------------:|--------------:|------------------:|---------------:|---------------:|------------:|-------------:|------------:| | ('Bassin Industriel', 44) | 48.7 | 29.2 | 24.1 | 26.8 | 32.2 | 37.1 | 60 | 47 | 21.1 | 34 | 37.1 | | ('Bassin Industriel', 49) | 58.7 | 33.4 | 28.2 | 25 | 42.2 | 33 | 68.2 | 49.2 | 18.7 | 30.2 | 40.9 | | ('Bassin Industriel', 53) | 63.8 | 33.3 | 32.4 | 24.2 | 44.7 | 36.9 | 71.8 | 53.3 | 18.2 | 26.6 | 39.5 | | ('Bassin Industriel', 72) | 67 | 36.5 | 28.1 | 24.5 | 43.7 | 43 | 66.5 | 53.9 | 22.3 | 29.3 | 42.1 | | ('Bassin Industriel', 85) | 60.2 | 31.1 | 27.2 | 28.6 | 37 | 38.5 | 65.2 | 49.8 | 19.1 | 29.7 | 46.3 | | ('Bassin Résidentiel', 44) | 48.6 | 33 | 25.8 | 25.2 | 28.7 | 39.5 | 59 | 51.3 | 21.8 | 34.2 | 32.9 | | ('Bassin Résidentiel', 49) | 52.1 | 31.8 | 26.7 | 26 | 36.1 | 32.6 | 63.3 | 49.3 | 24 | 29.9 | 36.6 | | ('Bassin Résidentiel', 53) | 76.6 | 34.8 | 46.5 | 18.2 | 49.2 | 37.2 | 75.8 | 67 | 18.7 | 29.9 | 36.2 | | ('Bassin Résidentiel', 72) | 56.3 | 32.2 | 29 | 24.6 | 38.1 | 40.4 | 63.7 | 49.3 | 22.3 | 32.8 | 35.1 | | ('Bassin Résidentiel', 85) | 63.3 | 41.4 | 35.8 | 20.9 | 39.3 | 41.1 | 66.1 | 55.5 | 25.9 | 31.4 | 30.7 | | ('Bassin Urbain', 44) | 36.1 | 32.6 | 25.8 | 26.1 | 24.2 | 30.9 | 41 | 53.6 | 21 | 32 | 24.4 | | ('Bassin Urbain', 49) | 47.9 | 35.5 | 27.6 | 24.4 | 34.9 | 31.9 | 55.9 | 48 | 22.7 | 32.4 | 30.8 | | ('Bassin Urbain', 53) | 44.7 | 28.8 | 21.8 | 27.1 | 32.3 | 33.8 | 58.1 | 43.6 | 16.4 | 36 | 37.7 | | ('Bassin Urbain', 72) | 41.7 | 36.1 | 23.9 | 24.3 | 32.4 | 36.9 | 48.9 | 43 | 19.7 | 34.6 | 27.7 | | ('Bassin Urbain', 85) | 47.9 | 33 | 23.9 | 27.8 | 29.1 | 37.4 | 57.7 | 48.1 | 22.2 | 36.5 | 34.3 | | ('Bassin diversifié', 44) | 49.5 | 25 | 22.1 | 28 | 31.1 | 40 | 64.1 | 49.3 | 17.5 | 35.4 | 39.5 | | ('Bassin diversifié', 49) | 56 | 30.2 | 30.2 | 25.9 | 41 | 31.9 | 66.7 | 47.5 | 18.4 | 32.3 | 38.5 | | ('Bassin diversifié', 53) | 56.6 | 26.8 | 25.5 | 30.6 | 36.2 | 37.6 | 69.1 | 46.5 | 21.3 | 29.5 | 44.3 | | ('Bassin diversifié', 72) | 59.8 | 29.2 | 26.1 | 28.3 | 37.3 | 42 | 63.5 | 46.9 | 22.8 | 30 | 46.5 | | ('Bassin diversifié', 85) | 63.5 | 32.3 | 28.2 | 27.1 | 38.6 | 41.6 | 69.8 | 50.7 | 22.5 | 33.6 | 43.3 | | ('Bassins Agroalimentaire', 44) | 56.6 | 30.7 | 28.9 | 23.6 | 40.2 | 35.4 | 62.2 | 51.1 | 18.7 | 29.9 | 40.4 | | ('Bassins Agroalimentaire', 49) | 53.2 | 31.4 | 30.6 | 24.4 | 39.4 | 33.5 | 64.9 | 46.8 | 19 | 30.2 | 34.8 | | ('Bassins Agroalimentaire', 53) | 66.2 | 31.8 | 38.1 | 23.6 | 49.2 | 33.1 | 74.9 | 53.3 | 21.5 | 25.9 | 35.8 | | ('Bassins Agroalimentaire', 72) | 64 | 30.5 | 28.5 | 28.8 | 40.7 | 40.9 | 68.8 | 52.4 | 24.1 | 34.7 | 40.8 | | ('Bassins Agroalimentaire', 85) | 61.3 | 30.3 | 28.2 | 30.4 | 36.1 | 41 | 65.9 | 48.3 | 18.3 | 31.4 | 46.2 |
Réalise la cartographie d'un indicteur ou d'une données stats. Permet d'avoir un regard critique sur ces données.
import folium
import geopandas as gpd
def choroplethInsee(m, variable):
#
filename = '../data/Contourgeocode/a_com2020_PDL_geojson.json'
geodf = gpd.read_file(filename)
geodf['codgeo'] = geodf['codgeo'].astype('int')
folium.Choropleth(
geo_data=geodf,
name='score précarité numérique',
data=dfS,
columns=['CODGEO', variable],
key_on='feature.properties.codgeo',
fill_color='RdYlBu_r',
fill_opacity=0.8,
line_opacity=0.2,
legend_name='DYN SetC',
smooth_factor=0
).add_to(m)
dfT=dfS[['CODGEO', variable]]
geodfT=geodf.merge(dfT, left_on='codgeo', right_on='CODGEO')
style_function = lambda x: {'fillColor': '#ffffff', 'color':'#000000', 'fillOpacity': 0.1, 'weight': 0.1}
highlight_function = lambda x: {'fillColor': '#000000', 'color':'#000000', 'fillOpacity': 0.50, 'weight': 0.1}
tooltips = folium.features.GeoJson(
geodfT,
style_function=style_function,
control=False,
highlight_function=highlight_function,
tooltip=folium.features.GeoJsonTooltip(
fields=['libgeo', variable],
aliases=['',variable + ":"],
style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;")
)
)
m.add_child(tooltips)
m.keep_in_front(tooltips)
folium.LayerControl().add_to(m)
return(m)
m = folium.Map(location=[47.4791129516, -0.814151724722], zoom_start=8, tiles='OpenStreetMap', max_bounds=True)
dfS['MED17'] = df['MED17']
m = choroplethInsee(m, 'MED17')
m