import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
%matplotlib inline
plt.style.use('ggplot')

# Lecture du fichier de données aggrégées
df = pd.read_csv("../data/data_aggregate_PDL.csv")


def computePercentage(cols, colref):
     #calcul un pourcentage en prenant l'effectif d'une somme de colonnes vis-à-vis d'un valeur de référence
    return(df[cols].sum(axis = 1) /df[colref])


#Traitement des colonnes
def findCols(df, s):
    #retourne une liste des colonnes contenant la chaine de caractères
    l = []
    for col in df.columns:
        if s in col:
            l.append(col) 
            #print(col)
    return(l)
cols = findCols(df, 'NIV_')


def findEtudeNCols(df, etude, s):
    #Parmi les données insee, utilise le prefix de nom de colonne pour effectuer une recherche de colonnes uniqueent dans ce subset
       #retourne une liste des colonnes contenant la chaine de caractères
    l = []
    for col in df.columns:
        if col.split("_")[0] == etude:
            if s in col:
                l.append(col) 
                #print(col)
    return(l)
cols = findEtudeNCols(df, 'POP2', 'NA38AZ')


#Proportion de chomeurs dans la population
df['p_chom'] = (df['P17_CHOM1564']/df['P17_POP'])

#Pourcentage de personnes de plus de 65 ans
cols = findCols(df, 'POP5_SEXE1_AGEQ65065') + findCols(df, 'POP5_SEXE2_AGEQ65065')
df['p_65+'] = computePercentage(cols, 'P17_POP')

#Pourcentage de 15-29 ans
cols = findCols(df, 'ACT2A_AGEQ65015_SEXE') + findCols(df, 'ACT2A_AGEQ65020_SEXE') + findCols(df, 'ACT2A_AGEQ65025_SEXE')
df['p_15_29'] = computePercentage(cols, 'P17_POP')


#Calcul le nombre de ménages par commune
cols = findCols(df, 'MEN1_NPERC')
df['nb_menages'] = df[cols].sum(axis = 1)
#Pourcentage de ménage à 1 personne
cols = findCols(df, 'MEN1_NPERC1') 
df['p_foyer1pers'] = computePercentage(cols, 'nb_menages')

#Calcul le nombre de familles par commune
cols = findCols(df, 'MEN1_NPERC')
df['nb_familles'] = df[cols].sum(axis = 1)
#Pourcentage de famille mono parentales
cols = findCols(df, 'MEN1_NPERC1') 
df['p_fammono'] =  computePercentage(cols, 'nb_familles')


#Nbe de personnes dans l'ech
cols = findCols(df, 'FOR2_AGEQ650')
df['nb_catdipl'] =  df[cols].sum(axis = 1)
# Pourcentage de pas ou faiblement diplomés
cols = findCols(df, 'DIPL_19A') + findCols(df, 'DIPL_19B') + findCols(df, 'DIPL_19C')
filters = ['AGEQ65015'] #filtre personnes de moins de 20 ans
cols = [s for s in cols if not any(xs in s for xs in filters)]
df['p_faible_dipl'] = computePercentage(cols, 'nb_catdipl')


#Pourcentage de personnes de plus de 75 ans
df['MEN3'] = df[findCols(df, 'AGEQ80_14')].sum(axis=1)
cols = findCols(df, 'AGEQ80_14075') + findCols(df, 'AGEQ80_14080')
df['p_75+'] = computePercentage(cols, 'MEN3')

#Pourcentage de personnes de plus de 65-74 ans
cols = findCols(df, 'AGEQ80_14060') + findCols(df, 'AGEQ80_14065') + findCols(df, 'AGEQ80_14070')
df['p_60_74'] = computePercentage(cols, 'MEN3')

# Proportion de personnes  de plus de 20 ans sans diplome
cols = findCols(df, 'DIPL_19')[2:] # POur ne garder queles plus de 20 ans
df['DIPL_20+'] = df[cols].sum(axis=1)
cols = findCols(df, 'DIPL_19A')
df['p_0dipl'] = computePercentage(cols, 'DIPL_20+')
df['p_0dipl']

# Proportion de personnes  de plus de 20 ans avec un faible diplome
cols = findCols(df, 'DIPL_19B') + findCols(df, 'DIPL_19C')
df['p_faibldipl'] = computePercentage(cols, 'DIPL_20+')


#Calcul des catégories socioprofessionnelles
colsRef = findEtudeNCols(df, 'POP6', 'CS1_8')
cols = findEtudeNCols(df, 'POP6', 'CS1_86')
df['p_ouv'] = round(df[cols].sum(axis=1)/df[colsRef].sum(axis=1), 4)


cols = findEtudeNCols(df, 'POP6', 'CS1_85')
df['p_empl'] = round(df[cols].sum(axis=1)/df[colsRef].sum(axis=1), 4)

cols = findEtudeNCols(df, 'POP6', 'CS1_82')
df['p_ind'] = round(df[cols].sum(axis=1)/df[colsRef].sum(axis=1), 4)



# Données caf
#Proportion de personnes 
df['p_TR50'] = df['PRES_TR50PFRB'] /df['P17_POP']
df['p_TR100'] = df['PRES_TR100PFRB'] /df['P17_POP']


#Proportion ouvrier/agri
cols = findCols(df, 'POP6_CS1_8')
df['POP6CS1_20+'] = df[cols].sum(axis=1)

cols = findCols(df, 'CS1_81') + findCols(df, 'CS1_86')
df['p_ouvragri'] = computePercentage(cols, 'POP6CS1_20+')

#Proportion indépendants
cols = findCols(df, 'CS1_82')
df['p_indé'] = computePercentage(cols, 'POP6CS1_20+')


#COrrection data
df.at[735,'p_ind'] = 0.15


cols = [['p_75+', '+', 'Score +75 ans', 3],
['p_60_74', '+', 'Score 60-74 ans', 2], 
['p_15_29', '+', 'Score 15-29ans', 2], 
['p_0dipl', '+', 'Score aucun diplome', 3], 
['p_faibldipl', '+', 'Score faible diplôme', 2], 
['MED17', '-', 'Score revenus\nmedian faible', 1], 
['p_foyer1pers', '+', 'Score familles\nmonoparentales\n& foyer 1 pers', 2], 
['p_ind', '+', 'Score d\'independants', 1], 
['p_empl', '+', 'Score d\'employés', 1],
['p_ouv', '+', 'Score d\'ouvriers', 3]]


def computeScoreWithNA(df, cols):
    #Retourne un df de scores de précarité de plusieurs colonnes, peut comporter des colonnes avec NAs
    #     coldic: dictionnaire de  avec orientation
    #     Les valeurs manquantes ne sont pas prise en compte dans le calcul de normalisation et moyenne du score global
    dfT = pd.DataFrame(df.CODGEO)
    for col in cols:
        variable = col[0]
        #df[col] = df.loc[:,col].fillna(df.loc[:,col].mean()) # Rempalce les valeurs manquantes par la moyenne de région
        orientation = col[1]
        #print(variable, orientation)
        vals = df[variable]
        if orientation == '-':
            dfT[variable + '_score'] = round(((((vals.mean()- vals)  / vals.mean()) +1)*100), 1)
        elif orientation == '+':
            dfT[variable + '_score'] = round(((((vals - vals.mean()) / vals.mean()) +1)*100), 1)
        dfT[variable + '_score'] = dfT[variable + '_score']
        #display(dfT)
    return(dfT)

dfS = computeScoreWithNA(df, cols)
# Gestion des valeurs manquante pour l'indicateur de revenu median
dfS['MED17_score'] = dfS['MED17_score'].fillna(dfS['MED17_score'].median())


#Normalisation des colonnes de scores sur 0-100
def norm(s):
    #normalisation d'une serie sur 0-100
    s = 100 * (s - s.min()) / (s.max() - s.min())
    s = round(s, 1)
    return(s)
for col in dfS:
    if '_score' in col:
        dfS[col + '_norm'] = norm(dfS[col])


# Ne garde que les colonnes normalisées
for col in dfS.columns:
    if col.endswith('_score'):
        dfS = dfS.drop(col, axis=1)


#Calcul d'un score de précarité sur 0-100 avec facteurs de pondération
coef_sum = 0
dfS['score'] = 0

for col in cols:
    coef = int(col[3])
    coef_sum += coef   
    dfS['score'] +=  dfS[col[0] + '_score_norm'] * coef
    dfS= dfS
dfS['score'] = round(dfS['score']/coef_sum, 1)
dfS.at[820,'score'] = dfS['score'].mean()
s = dfS['score']
s = 100 * (s - s.min()) / (s.max() - s.min())
s = round(s, 1)
dfS['score'] = s


# AJout de données sur la commune
dfS['CODGEO'] = dfS['CODGEO'].astype(int)
dfS['Commune'] = df['LIBGEO_x']
dfS['Département'] = df['DEP_x'].astype(int)
#dfS['Dynamique Démographique'] = df['Dynamique Démographique BV']
dfS['Environnement Démographique'] = df['Environnement Démographique'].str.replace(" en croissance démographique", "")

# Réorganisation ordre des colonnes
dfS = dfS[['CODGEO', 'Commune', 'Département',
       'Environnement Démographique', 'score', 'p_60_74_score_norm', 'p_75+_score_norm',
       'p_15_29_score_norm', 'p_0dipl_score_norm', 'p_faibldipl_score_norm',
       'MED17_score_norm', 'p_foyer1pers_score_norm', 'p_ind_score_norm', 'p_empl_score_norm','p_ouv_score_norm'
       ]]

# Renommage colonnes
for col in dfS.columns:
    if "_score_norm" in col:
        newcolname = col.replace("_score_norm", "")
        newcolname = newcolname.replace("p_", "")
        newcolname = newcolname.replace("MED17", "revmed")
        newcolname = newcolname.replace('foyer1pers', 'foyer1')
        newcolname = 'score_' + newcolname
        dfS = dfS.rename(columns={col: newcolname})


dfS.columns

Index(['CODGEO', 'Commune', 'Département', 'Environnement Démographique',
       'score', 'score_60_74', 'score_75+', 'score_15_29', 'score_0dipl',
       'score_faibldipl', 'score_revmed', 'score_foyer1', 'score_ind',
       'score_empl', 'score_ouv'],
      dtype='object')


def exportDatatable(df, filename):
    """ export un data en datable jquery"""
    dfH =  df.to_html(index=False,escape=False)

    dfH=dfH.replace('<th>','<th class = "th-sm" style="text-align: left">').replace('border="1"','id="example"').replace('class="dataframe"','class="display"')
    msgxx ='''
        <script type="text/javascript" src="https://code.jquery.com/jquery-3.3.1.js"></script>
        <script type="text/javascript" src="https://cdn.datatables.net/1.10.20/js/jquery.dataTables.min.js"></script>
        <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.10.20/css/jquery.dataTables.min.css"/>
         <script>
        $(document).ready(function() {
            $('#example').DataTable( {
                "pageLength": 50,
                "language": {"url": "https://cdn.datatables.net/plug-ins/1.10.24/i18n/French.json"}
            } );
        } );</script>
        '''
    dfH = msgxx + dfH
    dfH=dfH.replace('<th>','<th class = "th-sm"')
    f= open(filename, 'w+')
    f.write(dfH)
    f.close()
#exportDatatable(df, 'ftth_table.html')


exportDatatable(dfS, './tables/scores.html')
#dfS.to_html('./tables/scores.html', index=False)
dfS.to_csv('./scores.csv', sep='\t', index=False)


#Export d'une table pour chaque département

for dep in [44,49,53,72,85]:
    dfD = dfS[dfS['Département'] == dep]
    dfD = dfD.drop('Département', axis=1)
    exportDatatable(dfD, './tables/scores_' + str(dep) + '.html')


exportDatatable(dfS, './tables/scores2.html')


# Histogramme de distribution du score pondéré
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
dfS['score'].hist(bins=35, ax=ax)
fig.savefig('./figures/hist_score.png')


d = dfS.iloc[:,5:].describe().T[['mean', 'std', '25%','50%','75%']].round(1)
d.columns
dicR = {'mean': 'moyenne', 'std':'écart type'}
d =d.rename(columns=dicR).T
display(d)
print(d.to_markdown())

|            |   score_60_74 |   score_75+ |   score_15_29 |   score_0dipl |   score_faibldipl |   score_revmed |   score_foyer1 |   score_ind |   score_empl |   score_ouv |
|:-----------|--------------:|------------:|--------------:|--------------:|------------------:|---------------:|---------------:|------------:|-------------:|------------:|
| moyenne    |          33.1 |        28.5 |          25.5 |          37.5 |              37.8 |           63.2 |           50.2 |        21.3 |         31.4 |        37.1 |
| écart type |          10.6 |        13.3 |          10.7 |          12.1 |               8.4 |           13.9 |           12.3 |        12.7 |          8.9 |        12.9 |
| 25%        |          26.9 |        20.2 |          19.2 |          28.8 |              32.6 |           55.9 |           41.7 |        14   |         26.4 |        28.5 |
| 50%        |          32   |        26.7 |          25.5 |          36.2 |              37.8 |           65.2 |           49.3 |        19.8 |         31.5 |        36.9 |
| 75%        |          38   |        35.8 |          30.8 |          44.5 |              42.5 |           72.1 |           57.6 |        26.8 |         36.3 |        44.5 |


len(cols)

10


fig, axs = plt.subplots(ncols=5, figsize=(18, 4))
fig.subplots_adjust(left=0.042,right=0.981,hspace=0.35, wspace=0.2)
#fig.suptitle("Proposez-vous des initiations et/ou des formations au numérique ?\n")
i=0
for col in cols[:5]:
    newcol = col[0].replace('p_', 'score_')
    if col[0] == 'MED17':
        newcol = 'score_revmed'
    if col[0] == 'p_foyer1pers':
        newcol = 'score_foyer1'
    dfS[newcol].plot(kind='hist', subplots=True, ax=axs[i], bins=20, ylim =[0,350])#, color=['C3', 'C0', 'C2', 'C4'], yticks =range(0,61,10), ylim =[0,35], fontsize=16, rot=45)
    axs[i].set_title(col[2].replace('Score ','Score\n'), fontsize=12)
    i+=1
plt.savefig('./figures/hist_scores1.png')

fig, axs = plt.subplots(ncols=5, figsize=(18, 4))
fig.subplots_adjust(left=0.042,right=0.981,hspace=0.35, wspace=0.2)
i=0
for col in cols[5:]:
    newcol = col[0].replace('p_', 'score_')
    if col[0] == 'MED17':
        newcol = 'score_revmed'
    if col[0] == 'p_foyer1pers':
        newcol = 'score_foyer1'
    dfS[newcol].plot(kind='hist', subplots=True, ax=axs[i], bins=20, ylim =[0,350])#, color=['C3', 'C0', 'C2', 'C4'], yticks =range(0,61,10), ylim =[0,35], fontsize=16, rot=45)
    axs[i].set_title(col[2].replace('Score ','Score\n'), fontsize=12)
    i+=1
plt.savefig('./figures/hist_scores2.png')


d = dfS.groupby(["Département"])['Commune'].count()
s = dfS.groupby(["Département"])['score'].mean()
sup = pd.Series([6809,7172,5175,6206,6720] , index =[44, 49, 53, 72, 85], name='superficie km²')
dens = pd.Series(d/sup*100, name='Communes/100km²')
d = pd.concat([d, s, sup, dens], axis=1).round(1)
print(d.to_markdown())
display(d)

|    |   Commune |   score |   superficie km² |   Communes/100km² |
|---:|----------:|--------:|-----------------:|------------------:|
| 44 |       207 |    45.8 |             6809 |               3   |
| 49 |       177 |    52.4 |             7172 |               2.5 |
| 53 |       242 |    60.1 |             5175 |               4.7 |
| 72 |       354 |    58.5 |             6206 |               5.7 |
| 85 |       258 |    60.5 |             6720 |               3.8 |


# ANalyse par département
d = dfS.groupby(["Département"])['score',  'score_60_74', 'score_75+', 'score_15_29', 'score_0dipl', 'score_faibldipl', 'score_revmed', 'score_foyer1', 'score_ind', 'score_empl', 'score_ouv'].mean().round(1)
display(d)
print(d.to_markdown())

/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:2: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.

|   Département |   score |   score_60_74 |   score_75+ |   score_15_29 |   score_0dipl |   score_faibldipl |   score_revmed |   score_foyer1 |   score_ind |   score_empl |   score_ouv |
|--------------:|--------:|--------------:|------------:|--------------:|--------------:|------------------:|---------------:|---------------:|------------:|-------------:|------------:|
|            44 |    45.8 |          31.2 |        25.1 |          26.1 |          29.1 |              36.6 |           55.4 |           50.3 |        21.1 |         33.6 |        32.8 |
|            49 |    52.4 |          33.3 |        28.6 |          24.9 |          38.1 |              32.4 |           62.1 |           48   |        20.7 |         31.4 |        35.1 |
|            53 |    60.1 |          31   |        31.1 |          25.6 |          42.2 |              35.6 |           69.8 |           50.7 |        19.3 |         28.7 |        39.1 |
|            72 |    58.5 |          33.7 |        27.4 |          25.3 |          39.1 |              40.9 |           62.4 |           49.7 |        22.1 |         31.6 |        38.3 |
|            85 |    60.5 |          35.6 |        30.6 |          25.4 |          37.1 |              40   |           65.3 |           51.9 |        22.5 |         31.7 |        38.2 |


d = dfS.groupby(["Environnement Démographique"])['score',  'score_60_74', 'score_75+', 'score_15_29', 'score_0dipl', 'score_faibldipl', 'score_revmed', 'score_foyer1', 'score_ind', 'score_empl', 'score_ouv'].mean().round(1)
display(d)
print(d.to_markdown())

/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
  """Entry point for launching an IPython kernel.

| Environnement Démographique   |   score |   score_60_74 |   score_75+ |   score_15_29 |   score_0dipl |   score_faibldipl |   score_revmed |   score_foyer1 |   score_ind |   score_empl |   score_ouv |
|:------------------------------|--------:|--------------:|------------:|--------------:|--------------:|------------------:|---------------:|---------------:|------------:|-------------:|------------:|
| Bassin Industriel             |    60.9 |          33.2 |        28.2 |          25.6 |          40.4 |              38.8 |           66.4 |           51.3 |        20.2 |         29.7 |        41.3 |
| Bassin Résidentiel            |    57.1 |          35.7 |        30.9 |          23.4 |          36.4 |              40   |           63.6 |           52.4 |        23.5 |         32.4 |        33.1 |
| Bassin Urbain                 |    43.6 |          33.7 |        25   |          25.6 |          31.1 |              33.8 |           51.9 |           47.2 |        20.6 |         33.9 |        30.4 |
| Bassin diversifié             |    58.2 |          28.9 |        26.7 |          28.3 |          37.4 |              38.8 |           66.7 |           47.7 |        21.2 |         31.2 |        43.5 |
| Bassins Agroalimentaire       |    61.9 |          31.2 |        32.9 |          25.8 |          43.1 |              36   |           69.8 |           50.8 |        20.8 |         29.4 |        38.4 |


d = dfS.groupby(["Environnement Démographique", "Département"])['score',  'score_60_74', 'score_75+', 'score_15_29', 'score_0dipl', 'score_faibldipl', 'score_revmed', 'score_foyer1', 'score_ind', 'score_empl', 'score_ouv'].mean().round(1)
display(d)
print(d.to_markdown())

/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
  """Entry point for launching an IPython kernel.

|                                 |   score |   score_60_74 |   score_75+ |   score_15_29 |   score_0dipl |   score_faibldipl |   score_revmed |   score_foyer1 |   score_ind |   score_empl |   score_ouv |
|:--------------------------------|--------:|--------------:|------------:|--------------:|--------------:|------------------:|---------------:|---------------:|------------:|-------------:|------------:|
| ('Bassin Industriel', 44)       |    48.7 |          29.2 |        24.1 |          26.8 |          32.2 |              37.1 |           60   |           47   |        21.1 |         34   |        37.1 |
| ('Bassin Industriel', 49)       |    58.7 |          33.4 |        28.2 |          25   |          42.2 |              33   |           68.2 |           49.2 |        18.7 |         30.2 |        40.9 |
| ('Bassin Industriel', 53)       |    63.8 |          33.3 |        32.4 |          24.2 |          44.7 |              36.9 |           71.8 |           53.3 |        18.2 |         26.6 |        39.5 |
| ('Bassin Industriel', 72)       |    67   |          36.5 |        28.1 |          24.5 |          43.7 |              43   |           66.5 |           53.9 |        22.3 |         29.3 |        42.1 |
| ('Bassin Industriel', 85)       |    60.2 |          31.1 |        27.2 |          28.6 |          37   |              38.5 |           65.2 |           49.8 |        19.1 |         29.7 |        46.3 |
| ('Bassin Résidentiel', 44)      |    48.6 |          33   |        25.8 |          25.2 |          28.7 |              39.5 |           59   |           51.3 |        21.8 |         34.2 |        32.9 |
| ('Bassin Résidentiel', 49)      |    52.1 |          31.8 |        26.7 |          26   |          36.1 |              32.6 |           63.3 |           49.3 |        24   |         29.9 |        36.6 |
| ('Bassin Résidentiel', 53)      |    76.6 |          34.8 |        46.5 |          18.2 |          49.2 |              37.2 |           75.8 |           67   |        18.7 |         29.9 |        36.2 |
| ('Bassin Résidentiel', 72)      |    56.3 |          32.2 |        29   |          24.6 |          38.1 |              40.4 |           63.7 |           49.3 |        22.3 |         32.8 |        35.1 |
| ('Bassin Résidentiel', 85)      |    63.3 |          41.4 |        35.8 |          20.9 |          39.3 |              41.1 |           66.1 |           55.5 |        25.9 |         31.4 |        30.7 |
| ('Bassin Urbain', 44)           |    36.1 |          32.6 |        25.8 |          26.1 |          24.2 |              30.9 |           41   |           53.6 |        21   |         32   |        24.4 |
| ('Bassin Urbain', 49)           |    47.9 |          35.5 |        27.6 |          24.4 |          34.9 |              31.9 |           55.9 |           48   |        22.7 |         32.4 |        30.8 |
| ('Bassin Urbain', 53)           |    44.7 |          28.8 |        21.8 |          27.1 |          32.3 |              33.8 |           58.1 |           43.6 |        16.4 |         36   |        37.7 |
| ('Bassin Urbain', 72)           |    41.7 |          36.1 |        23.9 |          24.3 |          32.4 |              36.9 |           48.9 |           43   |        19.7 |         34.6 |        27.7 |
| ('Bassin Urbain', 85)           |    47.9 |          33   |        23.9 |          27.8 |          29.1 |              37.4 |           57.7 |           48.1 |        22.2 |         36.5 |        34.3 |
| ('Bassin diversifié', 44)       |    49.5 |          25   |        22.1 |          28   |          31.1 |              40   |           64.1 |           49.3 |        17.5 |         35.4 |        39.5 |
| ('Bassin diversifié', 49)       |    56   |          30.2 |        30.2 |          25.9 |          41   |              31.9 |           66.7 |           47.5 |        18.4 |         32.3 |        38.5 |
| ('Bassin diversifié', 53)       |    56.6 |          26.8 |        25.5 |          30.6 |          36.2 |              37.6 |           69.1 |           46.5 |        21.3 |         29.5 |        44.3 |
| ('Bassin diversifié', 72)       |    59.8 |          29.2 |        26.1 |          28.3 |          37.3 |              42   |           63.5 |           46.9 |        22.8 |         30   |        46.5 |
| ('Bassin diversifié', 85)       |    63.5 |          32.3 |        28.2 |          27.1 |          38.6 |              41.6 |           69.8 |           50.7 |        22.5 |         33.6 |        43.3 |
| ('Bassins Agroalimentaire', 44) |    56.6 |          30.7 |        28.9 |          23.6 |          40.2 |              35.4 |           62.2 |           51.1 |        18.7 |         29.9 |        40.4 |
| ('Bassins Agroalimentaire', 49) |    53.2 |          31.4 |        30.6 |          24.4 |          39.4 |              33.5 |           64.9 |           46.8 |        19   |         30.2 |        34.8 |
| ('Bassins Agroalimentaire', 53) |    66.2 |          31.8 |        38.1 |          23.6 |          49.2 |              33.1 |           74.9 |           53.3 |        21.5 |         25.9 |        35.8 |
| ('Bassins Agroalimentaire', 72) |    64   |          30.5 |        28.5 |          28.8 |          40.7 |              40.9 |           68.8 |           52.4 |        24.1 |         34.7 |        40.8 |
| ('Bassins Agroalimentaire', 85) |    61.3 |          30.3 |        28.2 |          30.4 |          36.1 |              41   |           65.9 |           48.3 |        18.3 |         31.4 |        46.2 |


import folium
import geopandas as gpd
def choroplethInsee(m, variable):
    # 
    filename = '../data/Contourgeocode/a_com2020_PDL_geojson.json'
    geodf = gpd.read_file(filename)
    geodf['codgeo'] = geodf['codgeo'].astype('int')
    folium.Choropleth(
        geo_data=geodf,
        name='score précarité numérique',
        data=dfS,
        columns=['CODGEO', variable],
        key_on='feature.properties.codgeo',
        fill_color='RdYlBu_r',
        fill_opacity=0.8,
        line_opacity=0.2,
        legend_name='DYN SetC',
        smooth_factor=0
    ).add_to(m)

    dfT=dfS[['CODGEO', variable]]
    geodfT=geodf.merge(dfT, left_on='codgeo', right_on='CODGEO')

    style_function = lambda x: {'fillColor': '#ffffff', 'color':'#000000', 'fillOpacity': 0.1, 'weight': 0.1}
    highlight_function = lambda x: {'fillColor': '#000000', 'color':'#000000', 'fillOpacity': 0.50, 'weight': 0.1}
    tooltips = folium.features.GeoJson(
        geodfT,
        style_function=style_function, 
        control=False,
        highlight_function=highlight_function, 
        tooltip=folium.features.GeoJsonTooltip(
            fields=['libgeo', variable],
            aliases=['',variable + ":"],
            style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") 
        )
    )
    m.add_child(tooltips)
    m.keep_in_front(tooltips)
    folium.LayerControl().add_to(m)

    return(m)
m = folium.Map(location=[47.4791129516, -0.814151724722], zoom_start=8, tiles='OpenStreetMap', max_bounds=True)
dfS['MED17'] = df['MED17']
m = choroplethInsee(m, 'MED17')
m

	CODGEO	Commune	Département	Environnement Démographique	score	score_60_74	score_75+	score_15_29	score_0dipl	score_faibldipl	score_revmed	score_foyer1	score_ind	score_empl	score_ouv
980	85001	L'Aiguillon-sur-Mer	85	Bassin Résidentiel	82.5	58.0	65.9	6.1	53.6	42.3	64.6	76.1	21.3	15.3	14.5
981	85002	L'Aiguillon-sur-Vie	85	Bassin Résidentiel	68.4	37.7	34.7	25.7	35.8	44.6	65.2	58.3	24.6	36.7	38.8
982	85003	Aizenay	85	Bassin Industriel	50.9	30.5	22.3	31.4	36.5	32.6	59.3	47.5	22.4	36.7	36.2
983	85004	Angles	85	Bassin Résidentiel	64.9	69.9	41.5	13.2	42.0	48.9	57.7	61.5	15.0	25.5	10.6
984	85005	Antigny	85	Bassin Industriel	69.8	38.4	25.3	34.0	43.3	40.7	69.5	44.3	26.4	31.0	49.4
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1233	85303	Vix	85	Bassin Résidentiel	74.4	32.9	37.0	25.2	51.7	39.0	72.1	56.8	31.6	28.6	38.3
1234	85304	Vouillé-les-Marais	85	Bassin Résidentiel	56.5	43.7	23.7	16.7	43.7	40.9	80.4	52.9	14.8	29.2	28.5
1235	85305	Vouvant	85	Bassin Résidentiel	61.4	40.4	38.4	14.6	46.1	27.3	66.6	50.1	37.4	27.0	32.8
1236	85306	Xanton-Chassenon	85	Bassin Résidentiel	37.5	28.8	6.9	24.7	34.8	37.0	65.7	31.6	20.9	49.1	35.5
1237	85307	La Faute-sur-Mer	85	Bassin Résidentiel	70.5	52.6	61.1	5.5	39.2	38.9	49.1	73.8	59.8	20.8	9.4

Calcul d'indicateurs de précarité numérique¶

Calcul d'un score à partir des indicateurs calculés précédemment¶

Analyse de distribution du score de précarité¶

Analyses par Département et Bassin¶

Sandbox¶

	score_60_74	score_75+	score_15_29	score_0dipl	score_faibldipl	score_revmed	score_foyer1	score_ind	score_empl	score_ouv
moyenne	33.1	28.5	25.5	37.5	37.8	63.2	50.2	21.3	31.4	37.1
écart type	10.6	13.3	10.7	12.1	8.4	13.9	12.3	12.7	8.9	12.9
25%	26.9	20.2	19.2	28.8	32.6	55.9	41.7	14.0	26.4	28.5
50%	32.0	26.7	25.5	36.2	37.8	65.2	49.3	19.8	31.5	36.9
75%	38.0	35.8	30.8	44.5	42.5	72.1	57.6	26.8	36.3	44.5

	Commune	score	superficie km²	Communes/100km²
44	207	45.8	6809	3.0
49	177	52.4	7172	2.5
53	242	60.1	5175	4.7
72	354	58.5	6206	5.7
85	258	60.5	6720	3.8

	score	score_60_74	score_75+	score_15_29	score_0dipl	score_faibldipl	score_revmed	score_foyer1	score_ind	score_empl	score_ouv
Département
44	45.8	31.2	25.1	26.1	29.1	36.6	55.4	50.3	21.1	33.6	32.8
49	52.4	33.3	28.6	24.9	38.1	32.4	62.1	48.0	20.7	31.4	35.1
53	60.1	31.0	31.1	25.6	42.2	35.6	69.8	50.7	19.3	28.7	39.1
72	58.5	33.7	27.4	25.3	39.1	40.9	62.4	49.7	22.1	31.6	38.3
85	60.5	35.6	30.6	25.4	37.1	40.0	65.3	51.9	22.5	31.7	38.2

	score	score_60_74	score_75+	score_15_29	score_0dipl	score_faibldipl	score_revmed	score_foyer1	score_ind	score_empl	score_ouv
Environnement Démographique
Bassin Industriel	60.9	33.2	28.2	25.6	40.4	38.8	66.4	51.3	20.2	29.7	41.3
Bassin Résidentiel	57.1	35.7	30.9	23.4	36.4	40.0	63.6	52.4	23.5	32.4	33.1
Bassin Urbain	43.6	33.7	25.0	25.6	31.1	33.8	51.9	47.2	20.6	33.9	30.4
Bassin diversifié	58.2	28.9	26.7	28.3	37.4	38.8	66.7	47.7	21.2	31.2	43.5
Bassins Agroalimentaire	61.9	31.2	32.9	25.8	43.1	36.0	69.8	50.8	20.8	29.4	38.4