import os #functions for working with your operating system
import shutil #extra functions for working with files

myfile="C:/Users/mtjansen/Desktop/Python/Files_Packages/Recipes.zip" #absolute path
os.path.isfile(myfile) #check if Python can find my file

True

os.path.isfile("Jupyter-Notebooks.ipynb")

True

os.chdir("C:/Users/mtjansen/Desktop/Python/Jupyter") #set working directory
myfile="CountyHealthData_2014-2015.csv" #relative path
os.path.isfile(myfile)

True

print(os.listdir(".")) #entire directory
print([path for path in os.listdir("C:/Users/mtjansen/Desktop/Python/Jupyter") if path[-3:]=="csv"]) #alternatively we can specify a folder and use a list comprehension to get a certain file type.

['.ipynb_checkpoints', 'CountyHealthData_2014-2015.csv', 'Extra_Topics.html', 'Extra_Topics.ipynb', 'Jupyter - Shortcut.lnk', 'Jupyter-Notebooks.html', 'Jupyter-Notebooks.ipynb', 'Other-Packages.html', 'Other-Packages.ipynb', 'output_56_1.png', 'output_58_1.png', 'output_60_0.png', 'output_60_1.png', 'output_63_1.png', 'output_64_1.png', 'output_66_1.png', 'output_81_1.png', 'output_83_1.png', 'Pandas.html', 'Pandas.ipynb', 'PandasExercises.html', 'PandasExercises.ipynb', 'PandasSolutions.html', 'PandasSolutions.ipynb', 'Pandas_backup.ipynb', 'protest_data.csv', 'UNC_logo.png', 'Untitled.ipynb', 'untitled.py', 'Untitled1.ipynb', 'Untitled2.ipynb', 'Untitled3.ipynb', 'Untitled4.ipynb', 'Untitled5.ipynb', 'Untitled6.ipynb']
['CountyHealthData_2014-2015.csv', 'protest_data.csv']

import pandas as pd

df = pd.read_csv("CountyHealthData_2014-2015.csv")
#df = pd.read_csv("C:/Users/mtjansen/Desktop/Python/Jupyter/CountyHealthData_2014-2015.csv")
#df = pd.read_csv("https://github.com/UNC-Libraries-data/Python/raw/main/Jupyter/CountyHealthData_2014-2015.csv")

df.shape

(6109, 64)

df.size == 6109 * 64

True

df.head()

df.sample(n=5)

df.describe(include = "all").round(2) 
#.round(2) allows us to round all of our numbers by 2 decimal places.

help(pd.DataFrame.shape)

Help on property:

    Return a tuple representing the dimensionality of the DataFrame.
    
    See Also
    --------
    ndarray.shape : Tuple of array dimensions.
    
    Examples
    --------
    >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
    >>> df.shape
    (2, 2)
    
    >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4],
    ...                    'col3': [5, 6]})
    >>> df.shape
    (2, 3)

df[0:3]

df["State"][5:10]

5    AK
6    AK
7    AK
8    AK
9    AK
Name: State, dtype: object

df[["State", "Year", "County", "Uninsured adults"]][19:25]

# df[1][0:3]

df.iloc[2,3]

'Anchorage Borough'

df.iloc[:,3] # All rows of column 3

0       Aleutians West Census Area
1       Aleutians West Census Area
2                Anchorage Borough
3                Anchorage Borough
4               Bethel Census Area
                   ...            
6104                  Uinta County
6105               Washakie County
6106               Washakie County
6107                 Weston County
6108                 Weston County
Name: County, Length: 6109, dtype: object

df.iloc[0:3,:] # Rows 0-2 of all columns

df.iloc[120:126,2:8] # Rows 120-125 of columns 2-8

df.loc[0:3,"Division"]

0    Pacific
1    Pacific
2    Pacific
3    Pacific
Name: Division, dtype: object

df.loc[0:3,["State","County","Food environment index"]]

df.loc[:,["State","County","Health care costs"]].head()

ex = df.copy()

#Do this:
ex.loc[0,"County"] = "New County"

#NOT this:
#ex["County"][0] = "New County"

print(ex.loc[0,"County"])

New County

all(df["Region"]==df.Region) #check if all entries of the two Series are the same.

True

print(df.Region.shape)
print(df.Region.dtypes)

(6109,)
object

df.Region.value_counts()

Region
South        2803
Midwest      2038
West          834
Northeast     434
Name: count, dtype: int64

df.dtypes.value_counts()

float64    54
object      6
int64       4
Name: count, dtype: int64

df[df["State"] == "RI"]

RI_subset = df[df["State"] == "RI"].copy()

import pandas as pd

protests = pd.read_csv("protest_data.csv")
# protests = pd.read_csv("https://github.com/UNC-Libraries-data/Python/raw/main/Jupyter/protest_data.csv")

protests.columns

Index(['id', 'country', 'ccode', 'year', 'region', 'protest', 'protestnumber',
       'startday', 'startmonth', 'startyear', 'endday', 'endmonth', 'endyear',
       'protesterviolence', 'location', 'participants_category',
       'participants', 'protesteridentity', 'protesterdemand1',
       'protesterdemand2', 'protesterdemand3', 'protesterdemand4',
       'stateresponse1', 'stateresponse2', 'stateresponse3', 'stateresponse4',
       'stateresponse5', 'stateresponse6', 'stateresponse7', 'sources',
       'notes'],
      dtype='object')

len(protests)

17145

Q2 = protests.country.value_counts().copy()
Q2

country
United Kingdom           578
France                   547
Ireland                  438
Germany                  364
Kenya                    353
                        ... 
Germany East              11
South Sudan               10
Czechoslovakia             6
Serbia and Montenegro      5
Germany West               2
Name: count, Length: 166, dtype: int64

Q2top = Q2.nlargest(5)
Q2top

country
United Kingdom    578
France            547
Ireland           438
Germany           364
Kenya             353
Name: count, dtype: int64

Q2bot = Q2.nsmallest(5)
Q2bot

country
Germany West              2
Serbia and Montenegro     5
Czechoslovakia            6
South Sudan              10
Germany East             11
Name: count, dtype: int64

import matplotlib.pyplot as plt

# Below is a special function that allows matplotlib, seaborn and other visualization libraries to display in Jupyter Notebooks
%matplotlib inline

# Create a bar chart showing the top 5 countries with the most protests
plt.bar(x = Q2top.index, height = Q2top.values)
plt.xlabel("Countries")
plt.ylabel("Total protests")

Text(0, 0.5, 'Total protests')

# Create a bar chart showing the top 5 countries with the least protests
plt.bar(x = Q2bot.index, height = Q2bot.values)
plt.xlabel("Countries")
plt.ylabel("Total protests")

Text(0, 0.5, 'Total protests')

plt.barh(y = Q2bot.index, width = Q2bot.values)
plt.ylabel("Countries")
plt.xlabel("Total protests")

Text(0.5, 0, 'Total protests')

Q3 = protests[["region", 
               "startyear", "startmonth", "startday", 
               "endyear", "endmonth", "endday", 
               "participants"]].copy()
Q3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17145 entries, 0 to 17144
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   region        17145 non-null  object 
 1   startyear     15239 non-null  float64
 2   startmonth    15239 non-null  float64
 3   startday      15239 non-null  float64
 4   endyear       15239 non-null  float64
 5   endmonth      15239 non-null  float64
 6   endday        15239 non-null  float64
 7   participants  15746 non-null  object 
dtypes: float64(6), object(2)
memory usage: 1.0+ MB

Q3["participants"]

0            1000s
1             1000
2              500
3             100s
4              950
           ...    
17140          50+
17141    About 100
17142          NaN
17143          NaN
17144          NaN
Name: participants, Length: 17145, dtype: object

Q3["participants"] = Q3["participants"].str.extract("(\d+)")
Q3["participants"]

0        1000
1        1000
2         500
3         100
4         950
         ... 
17140      50
17141     100
17142     NaN
17143     NaN
17144     NaN
Name: participants, Length: 17145, dtype: object

# remove any rows with missing information
Q3 = Q3.dropna()

# change the participants column to an integer
Q3["participants"] = Q3["participants"].astype("int64")

Q3.info()

<class 'pandas.core.frame.DataFrame'>
Index: 13988 entries, 0 to 17141
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   region        13988 non-null  object 
 1   startyear     13988 non-null  float64
 2   startmonth    13988 non-null  float64
 3   startday      13988 non-null  float64
 4   endyear       13988 non-null  float64
 5   endmonth      13988 non-null  float64
 6   endday        13988 non-null  float64
 7   participants  13988 non-null  int64  
dtypes: float64(6), int64(1), object(1)
memory usage: 983.5+ KB

# create dictionaries that combine the year, month and day columns
startdict = dict(year = Q3["startyear"], month = Q3["startmonth"], day = Q3["startday"])
enddict = dict(year = Q3["endyear"], month = Q3["endmonth"], day = Q3["endday"])

# create columns for start and end dates
Q3["startdate"] = pd.to_datetime(startdict)
Q3["enddate"] = pd.to_datetime(enddict)

# preview the new columns
Q3[["startdate", "enddate"]].head()

# subtract the start date from end date and store the results in a new column
Q3["protestlength"] = Q3["enddate"] - Q3["startdate"]

# show protest length in number of days; add 1 to include the first day on which the protest occurred
Q3["protestlength"] = Q3["protestlength"].dt.days + 1

Q3.head()

import seaborn as sns

# create a "grid" object using the FacetGrid function. 
grid = sns.FacetGrid(data = Q3, col = "region", col_wrap = 4, hue = "region")

# specify which chart we want to use on the grid and supply the variables for the x and y axis.
fig = grid.map_dataframe(sns.scatterplot, x = "protestlength", y = "participants")

# change the labels for our axes
fig.set_axis_labels("Protest Length (in days)", "Participants (in millions)")

<seaborn.axisgrid.FacetGrid at 0x23cb08bf8e0>

# create the grid object
grid = sns.FacetGrid(data = Q3, col = "region", col_wrap = 4, hue = "region")

# set both scales to logarithmic
grid.set(xscale="log", yscale="log")

# specify the chart to show on the grid
fig = grid.map_dataframe(sns.scatterplot, x = "protestlength", y = "participants")
fig.set_axis_labels("Protest Length (in days)", "Participants (in millions)")

<seaborn.axisgrid.FacetGrid at 0x23cb2de2f50>

# select the columns we need
Q4 = protests[["id", "country"]].copy()

# create a date column from the "startdict" object we used in Question 3
Q4["date"] = pd.to_datetime(startdict)

# get only year-month dates for a smoother line
Q4["date"] = Q4["date"].dt.to_period('M')

# to get the total number of protests per month, we'll need to group by date
Q4_group = Q4.groupby("date")

Q4.head()

# load bokeh modules
from bokeh.models import *
from bokeh.application import *
from bokeh.application.handlers import *
from bokeh.layouts import *
from bokeh.io import *
from bokeh.plotting import *

# set up bokeh for working in jupyter notebooks - this uses similar functionality to %matplotlib inline
output_notebook()

# create a source object from our pandas dataframe
pdata = ColumnDataSource(Q4_group)

# set up the size of our plot and format the x axis for dates
p = figure(height = 300, width = 600, x_axis_type = "datetime")

# add a line to our plot
# we can use "id_count" to get a count of the protests for each month
p.line(source = pdata, x = "date", y = "id_count", width = 2)

# push the plot to jupyter notebooks
handler = show(p, notebook_handle = True)
push_notebook(handle = handler)

# create a list of countries for our drop-down menu
menu_list = Q4["country"].unique().tolist()

# sort the list alphabetically
menu_list.sort()

# preview the first 10 items on the list
menu_list[0:10]

['Afghanistan',
 'Albania',
 'Algeria',
 'Angola',
 'Argentina',
 'Armenia',
 'Austria',
 'Azerbaijan',
 'Bahrain',
 'Bangladesh']

# create a new dataframe for when the default country, Afghanistan, is shown
default = Q4[Q4["country"] == "Afghanistan"].groupby("date")
p2data = ColumnDataSource(default)

# set up the size of our plot and format the x axis for dates
p2 = figure(height = 300, width = 600, x_axis_type = "datetime")

# add a line to our plot
p2.line(source = p2data, x = "date", y = "id_count", width = 2)

# create the dropdown menu with a list of all countries in our dataframe
menu = Select(title = "Select country", value = "Afghanistan", options = menu_list)

# define what happens when a new country is selected
def update_plot(attr, old, new):
    new_data = Q4[Q4["country"] == menu.value].groupby("date")
    p2data.data = new_data

# update the plot when a new country is selected
menu.on_change("value", update_plot)

# set up a layout for our menu and plot
lo = layout([[menu], [p2]])

# add the layout to the notebook
def modify_doc(doc):
    doc.add_root(lo)

# show the application in the notebook
handler = FunctionHandler(modify_doc)
app = Application(handler)
show(app, notebook_handle = True)

	State	Region	Division	County	FIPS	GEOID	SMS Region	Year	Premature death	Poor or fair health	...	Drug poisoning deaths	Uninsured adults	Uninsured children	Health care costs	Could not see doctor due to cost	Other primary care providers	Median household income	Children eligible for free lunch	Homicide rate	Inadequate social support
0	AK	West	Pacific	Aleutians West Census Area	2016	2016	Insuff Data	1/1/2014	NaN	0.122	...	NaN	0.374	0.250	3791.0	0.185	216.0	69192	0.127	NaN	0.287
1	AK	West	Pacific	Aleutians West Census Area	2016	2016	Insuff Data	1/1/2015	NaN	0.122	...	NaN	0.314	0.176	4837.0	0.185	254.0	74088	0.133	NaN	NaN
2	AK	West	Pacific	Anchorage Borough	2020	2020	Region 22	1/1/2014	6827.0	0.125	...	15.37	0.218	0.096	6588.0	0.119	135.0	71094	0.319	6.29	0.160
3	AK	West	Pacific	Anchorage Borough	2020	2020	Region 22	1/1/2015	6856.0	0.125	...	17.08	0.227	0.123	6582.0	0.119	148.0	76362	0.334	5.60	NaN
4	AK	West	Pacific	Bethel Census Area	2050	2050	Insuff Data	1/1/2014	13345.0	0.211	...	NaN	0.394	0.124	5860.0	0.200	169.0	41722	0.668	12.77	0.477

	State	Region	Division	County	FIPS	GEOID	SMS Region	Year	Premature death	Poor or fair health	...	Drug poisoning deaths	Uninsured adults	Uninsured children	Health care costs	Could not see doctor due to cost	Other primary care providers	Median household income	Children eligible for free lunch	Homicide rate	Inadequate social support
849	GA	South	South Atlantic	Echols County	13101	13101	Region 24	1/1/2015	8323.0	NaN	...	NaN	0.449	0.172	10477.0	NaN	NaN	34809	0.653	NaN	NaN
3094	MS	South	East South Central	Panola County	28107	28107	Region 16	1/1/2015	12717.0	0.223	...	11.93	0.266	0.065	10407.0	0.271	58.0	33129	0.747	8.60	NaN
168	AL	South	East South Central	Tallapoosa County	1123	1123	Region 16	1/1/2014	9864.0	0.265	...	7.60	0.198	0.048	10145.0	0.197	53.0	36463	0.539	7.60	0.319
5060	TX	South	West South Central	Fort Bend County	48157	48157	Region 10	1/1/2015	4364.0	0.131	...	4.70	0.238	0.109	11457.0	0.141	33.0	87901	0.232	4.10	NaN
818	GA	South	South Atlantic	Colquitt County	13071	13071	Region 24	1/1/2014	8542.0	0.184	...	8.08	0.349	0.134	9867.0	0.189	52.0	32692	0.661	11.64	0.208

	State	Region	Division	County	FIPS	GEOID	SMS Region	Year	Premature death	Poor or fair health	...	Drug poisoning deaths	Uninsured adults	Uninsured children	Health care costs	Could not see doctor due to cost	Other primary care providers	Median household income	Children eligible for free lunch	Homicide rate	Inadequate social support
count	6109	6109	6109	6109	6109.00	6109.00	6109	6109	5984.00	5424.00	...	4113.00	6109.00	6109.00	6105.00	4755.00	5979.00	6109.00	5835.00	2484.00	2427.00
unique	51	4	9	1830	NaN	NaN	30	2	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
top	TX	South	South Atlantic	Washington County	NaN	NaN	Insuff Data	1/1/2015	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
freq	469	2803	1175	60	NaN	NaN	1608	3062	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
mean	NaN	NaN	NaN	NaN	30322.71	30322.71	NaN	NaN	8029.29	0.17	...	13.61	0.21	0.08	9454.27	0.15	57.34	45401.81	0.43	6.31	0.19
std	NaN	NaN	NaN	NaN	15177.99	15177.99	NaN	NaN	2413.81	0.06	...	7.40	0.07	0.04	1550.57	0.06	45.71	11603.61	0.17	4.70	0.05
min	NaN	NaN	NaN	NaN	1001.00	1001.00	NaN	NaN	2398.00	0.04	...	2.32	0.04	0.01	3791.00	0.03	0.00	21572.00	0.00	0.60	0.00
25%	NaN	NaN	NaN	NaN	18163.00	18163.00	NaN	NaN	6351.50	0.13	...	8.67	0.16	0.06	8379.00	0.11	31.00	37588.00	0.31	3.20	0.16
50%	NaN	NaN	NaN	NaN	29133.00	29133.00	NaN	NaN	7707.50	0.17	...	11.99	0.21	0.08	9380.00	0.15	48.00	43479.00	0.42	5.08	0.19
75%	NaN	NaN	NaN	NaN	45067.00	45067.00	NaN	NaN	9374.25	0.21	...	16.61	0.26	0.10	10412.00	0.19	72.00	50884.00	0.54	8.00	0.22
max	NaN	NaN	NaN	NaN	56045.00	56045.00	NaN	NaN	25394.00	0.51	...	79.78	0.52	0.26	21913.00	0.41	1348.00	121250.00	1.00	51.49	0.57

	State	Region	Division	County	FIPS	GEOID	SMS Region	Year	Premature death	Poor or fair health	...	Drug poisoning deaths	Uninsured adults	Uninsured children	Health care costs	Could not see doctor due to cost	Other primary care providers	Median household income	Children eligible for free lunch	Homicide rate	Inadequate social support
0	AK	West	Pacific	Aleutians West Census Area	2016	2016	Insuff Data	1/1/2014	NaN	0.122	...	NaN	0.374	0.250	3791.0	0.185	216.0	69192	0.127	NaN	0.287
1	AK	West	Pacific	Aleutians West Census Area	2016	2016	Insuff Data	1/1/2015	NaN	0.122	...	NaN	0.314	0.176	4837.0	0.185	254.0	74088	0.133	NaN	NaN
2	AK	West	Pacific	Anchorage Borough	2020	2020	Region 22	1/1/2014	6827.0	0.125	...	15.37	0.218	0.096	6588.0	0.119	135.0	71094	0.319	6.29	0.160

	State	Year	County	Uninsured adults
19	AK	1/1/2015	Kodiak Island Borough	0.320
20	AK	1/1/2014	Lake and Peninsula Borough	0.434
21	AK	1/1/2015	Lake and Peninsula Borough	0.406
22	AK	1/1/2014	Matanuska-Susitna Borough	0.266
23	AK	1/1/2015	Matanuska-Susitna Borough	0.254
24	AK	1/1/2014	Nome Census Area	0.373

Python Crash Courses: Intro to Data¶

Review ¶

Packages ¶

Installation¶

Loading¶

Basic File Paths ¶

Working Directories¶

Pandas ¶

Exploring a DataFrame ¶

Attributes ¶

Methods ¶

Indexing ¶

`.iloc`¶

`.loc`¶

Indexing and Assignment ¶

Series ¶

Filtering ¶

`SettingwithCopyWarning` and filtered data¶

Working Example: Mass Mobilization Data ¶

About the dataset ¶

Setup ¶

Research Questions ¶

Question 1 : What information has been collected about the protests in the dataset?¶

Question 2 : How many protests does our dataset include and in which countries do they occur?¶

Visualization¶

Question 3: Are length and size of protests different in various regions of the world?¶

Question 4: Are there certain time periods with greater or fewer protests?¶

Learn more ¶

	Division	County	FIPS	GEOID	SMS Region	Year
120	East South Central	Lamar County	1075	1075	Region 16	1/1/2014
121	East South Central	Lamar County	1075	1075	Region 16	1/1/2015
122	East South Central	Lauderdale County	1077	1077	Region 16	1/1/2014
123	East South Central	Lauderdale County	1077	1077	Region 16	1/1/2015
124	East South Central	Lawrence County	1079	1079	Region 16	1/1/2014
125	East South Central	Lawrence County	1079	1079	Region 16	1/1/2015

	State	County	Food environment index
0	AK	Aleutians West Census Area	7.002
1	AK	Aleutians West Census Area	6.600
2	AK	Anchorage Borough	8.185
3	AK	Anchorage Borough	8.000

	State	Region	Division	County	FIPS	GEOID	SMS Region	Year	Premature death	Poor or fair health	...	Drug poisoning deaths	Uninsured adults	Uninsured children	Health care costs	Could not see doctor due to cost	Other primary care providers	Median household income	Children eligible for free lunch	Homicide rate	Inadequate social support
4505	RI	Northeast	New England	Bristol County	44001	44001	Region 12	1/1/2014	4543.0	0.087	...	12.35	0.113	0.027	8579.0	0.077	18.0	64572	0.139	NaN	0.197
4506	RI	Northeast	New England	Bristol County	44001	44001	Region 12	1/1/2015	3891.0	0.087	...	11.67	0.110	0.044	8355.0	0.077	16.0	68415	0.155	NaN	NaN
4507	RI	Northeast	New England	Kent County	44003	44003	Region 12	1/1/2014	6313.0	0.122	...	15.55	0.110	0.027	9890.0	0.105	61.0	59682	0.223	1.44	0.187
4508	RI	Northeast	New England	Kent County	44003	44003	Region 12	1/1/2015	6458.0	0.122	...	18.94	0.114	0.046	9621.0	0.105	68.0	63232	0.230	1.30	NaN
4509	RI	Northeast	New England	Newport County	44005	44005	Region 12	1/1/2014	4457.0	0.109	...	12.29	0.119	0.033	8897.0	0.090	30.0	63434	0.222	NaN	0.176
4510	RI	Northeast	New England	Newport County	44005	44005	Region 12	1/1/2015	4730.0	0.109	...	10.33	0.118	0.048	8149.0	0.090	33.0	67291	0.230	NaN	NaN
4511	RI	Northeast	New England	Providence County	44007	44007	Region 12	1/1/2014	6468.0	0.154	...	15.67	0.190	0.049	9480.0	0.125	88.0	47601	0.484	4.10	0.229
4512	RI	Northeast	New England	Providence County	44007	44007	Region 12	1/1/2015	6124.0	0.154	...	17.51	0.182	0.058	8983.0	0.125	93.0	47642	0.487	3.70	NaN
4513	RI	Northeast	New England	Washington County	44009	44009	Region 12	1/1/2014	4961.0	0.092	...	10.56	0.105	0.031	8776.0	0.088	39.0	69150	0.172	NaN	0.157
4514	RI	Northeast	New England	Washington County	44009	44009	Region 12	1/1/2015	4939.0	0.092	...	13.18	0.107	0.047	8212.0	0.088	49.0	69267	0.177	NaN	NaN

	startdate	enddate
0	1990-01-15	1990-01-15
1	1990-06-25	1990-06-25
2	1990-07-01	1990-07-01
3	1990-07-12	1990-09-06
4	1990-08-14	1990-08-15

	region	startyear	startmonth	startday	endyear	endmonth	endday	participants	startdate	enddate	protestlength
0	North America	1990.0	1.0	15.0	1990.0	1.0	15.0	1000	1990-01-15	1990-01-15	1
1	North America	1990.0	6.0	25.0	1990.0	6.0	25.0	1000	1990-06-25	1990-06-25	1
2	North America	1990.0	7.0	1.0	1990.0	7.0	1.0	500	1990-07-01	1990-07-01	1
3	North America	1990.0	7.0	12.0	1990.0	9.0	6.0	100	1990-07-12	1990-09-06	57
4	North America	1990.0	8.0	14.0	1990.0	8.0	15.0	950	1990-08-14	1990-08-15	2

	id	country	date
0	201990001	Canada	1990-01
1	201990002	Canada	1990-06
2	201990003	Canada	1990-07
3	201990004	Canada	1990-07
4	201990005	Canada	1990-08

Python Crash Courses: Intro to Data¶

Installation¶

Loading¶

Working Directories¶

SettingwithCopyWarning and filtered data¶

Visualization¶

`SettingwithCopyWarning` and filtered data¶