import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_csv("CountyHealthData_2014-2015.csv")
# df = pd.read_csv(r"C:\Users\npbyers\Desktop\Python\Jupyter\CountyHealthData_2014-2015.csv")


states=df.groupby(["State"]).mean() # group your counties by state, get means


states_sorted = states.sort_values(by="Adult smoking", ascending=False).reset_index().copy()
states_sorted.head()


states_sorted.loc[0:4,["State","Adult smoking"]] #.loc allows us to view only the columns & rows we select


highFEIs = df[df["Food environment index"] >= 9].copy() # keep only the counties in which the FEI value is 9 or higher
highFEIs = highFEIs[['State', 'County', 'Food environment index']] # keep only the relevant columns
highFEIs.shape # See how many rows you are left with to get a count

(236, 3)


highFEIs['State'].value_counts().head() #use .value_counts() to get an ordered list of the counts and .head() to 
                                        # to display the top 5 states

VA    64
ND    33
MN    31
NY    19
MD    18
Name: State, dtype: int64


dfDent = df[df["Dentists"] > 5*df['Primary care physicians']].copy()
dfDent.loc[:,["County", "State", "Dentists", "Primary care physicians", "2011 population estimate"]]


dfDent.sort_values(by="Dentists", ascending=False).loc[:,["County", "State", "Dentists"]]
#.sort_values() reorders your dataframe, while .loc displays only the relevant counties
#Note how the indices on the left are left out-of-order by the sorting function


#create a new column with the Dentists per capita values
dfDent.loc[:, 'Dentists per capita'] = dfDent.loc[:, 'Dentists']/dfDent.loc[:, '2011 population estimate']

# sort the dataframe from high to low by "Dentists per capita"
# display only the relevant columns with .loc
dfDent.loc[:, ['County', 'State', 'Dentists per capita']].sort_values(by='Dentists per capita', ascending=False)


NCdf = df[df["State"]=="NC"].copy()


NCdf.sort_values(by="Adult obesity", ascending=False).loc[:, ["County", "State", "Adult obesity", "Physical inactivity"]]


plt.scatter(x=NCdf["Adult obesity"],
            y=NCdf["Physical inactivity"])
plt.xlabel("Adult obesity")
plt.ylabel("Physical inactivity")

Text(0, 0.5, 'Physical inactivity')


#Using df.column.isin(list)
stateList = ["NC"]
countyList = ["Orange County"]
yearList = ["1/1/2015"]
df[df.State.isin(stateList) & df.County.isin(countyList) & df.Year.isin(yearList)].loc[:, ["County", "State", "Some college"]]
# This selects only rows with column values that are in the lists above.
# We use .loc here to display only the relevant columns from our filtered dataframe.


#Using df.column.isin(list)
stateList = ["NC", "CA"]
countyList = ["Orange County"]
yearList = ["1/1/2015"]
df[df.State.isin(stateList) & df.County.isin(countyList) & df.Year.isin(yearList)].loc[:, ["County", "State", "Some college"]]


df["Some college"].mean()

0.552274021934851

	State	FIPS	GEOID	Premature death	Poor or fair health	Poor physical health days	Poor mental health days	Low birthweight	Adult smoking	Adult obesity	...	Drug poisoning deaths	Uninsured adults	Uninsured children	Health care costs	Could not see doctor due to cost	Other primary care providers	Median household income	Children eligible for free lunch	Homicide rate	Inadequate social support
0	KY	21120.000000	21120.000000	9926.033333	0.242052	5.401681	4.805405	0.093389	0.285237	0.333404	...	22.446202	0.219096	0.067854	10945.350000	0.195421	70.008368	38807.483333	0.514336	5.312029	0.207121
1	AK	2162.130435	2162.130435	9642.682927	0.151773	3.569565	2.617391	0.055750	0.272043	0.303391	...	15.774286	0.328652	0.154565	7103.239130	0.158609	122.108696	56804.978261	0.408667	9.352500	0.266545
2	WV	54055.000000	54055.000000	9540.600000	0.216655	4.825926	4.214545	0.091545	0.267182	0.339591	...	23.848161	0.232855	0.051100	9915.918182	0.181491	73.296296	38279.600000	0.463459	6.454250	0.194283
3	TN	47095.000000	47095.000000	9816.500000	0.228440	4.968539	3.793590	0.089105	0.259626	0.325395	...	18.772343	0.213389	0.060963	10840.742105	0.199329	73.563830	39572.068421	0.519016	6.212523	0.188288
4	MO	29117.235808	29117.235808	8482.240175	0.190306	4.199083	4.035165	0.076140	0.254155	0.320633	...	14.325878	0.219498	0.089410	9561.834061	0.164266	46.031963	40041.275109	0.419683	6.001129	0.180250

	State	Adult smoking
0	KY	0.285237
1	AK	0.272043
2	WV	0.267182
3	TN	0.259626
4	MO	0.254155

	County	State	Dentists	Primary care physicians	2011 population estimate
378	Glenn County	CA	71.0	14.0	27992
379	Glenn County	CA	72.0	14.0	27940
509	Dolores County	CO	49.0	0.0	2029
801	Chattahoochee County	GA	358.0	61.0	12842
1312	Fremont County	ID	52.0	8.0	12957
1313	Fremont County	ID	54.0	8.0	12927
1374	Cass County	IL	52.0	7.0	13338
1375	Cass County	IL	53.0	7.0	13324
2053	Jackson County	KY	45.0	7.0	13331
2054	Jackson County	KY	45.0	8.0	13427
3578	Deuel County	NE	103.0	0.0	1937
3597	Garden County	NE	53.0	0.0	1902

	County	State	Dentists
801	Chattahoochee County	GA	358.0
3578	Deuel County	NE	103.0
379	Glenn County	CA	72.0
378	Glenn County	CA	71.0
1313	Fremont County	ID	54.0
1375	Cass County	IL	53.0
3597	Garden County	NE	53.0
1312	Fremont County	ID	52.0
1374	Cass County	IL	52.0
509	Dolores County	CO	49.0
2053	Jackson County	KY	45.0
2054	Jackson County	KY	45.0

	County	State	Dentists per capita
3578	Deuel County	NE	0.053175
801	Chattahoochee County	GA	0.027877
3597	Garden County	NE	0.027865
509	Dolores County	CO	0.024150
1313	Fremont County	ID	0.004177
1312	Fremont County	ID	0.004013
1375	Cass County	IL	0.003978
1374	Cass County	IL	0.003899
2053	Jackson County	KY	0.003376
2054	Jackson County	KY	0.003351
379	Glenn County	CA	0.002577
378	Glenn County	CA	0.002536

Pandas Exercises (Solutions)¶

Setup¶

Exercises¶

	County	State	Adult obesity	Physical inactivity
3397	Robeson County	NC	0.407	0.372
3308	Edgecombe County	NC	0.397	0.303
3398	Robeson County	NC	0.395	0.348
3326	Halifax County	NC	0.392	0.346
3325	Halifax County	NC	0.379	0.339
...	...	...	...	...
3251	Ashe County	NC	0.225	0.277
3331	Henderson County	NC	0.219	0.235
3332	Henderson County	NC	0.218	0.231
3378	Orange County	NC	0.213	0.158
3377	Orange County	NC	0.208	0.160