import pandas as pd

# This will get the dataset
# It's a good practice to go ahead and download it (curl/wget)
# and change this cell to read locally

df = pd.read_csv("https://aet-cs.github.io/white/ML/lessons/restaurant.csv")

df

from sklearn import preprocessing
le = preprocessing.LabelEncoder()
for c in df.columns:
    le.fit(df[c])
    df[c] = le.transform(df[c])

## Create your training X and y (you can use the whole dataset)
## use scikit-learn to make a decision tree
## calculate its accuracy and metrics

!pip install nltk

import nltk

## delete the next line after you download "brown" (or comment it)
nltk.download()

from nltk.corpus import brown

brown.words()

len(brown.words())

# your code!

	Alt	Bar	Fri	Hun	Pat	Price	Rain	Res	Type	Est	Wait
0	Yes	No	No	Yes	Some	$$$	No	Yes	French	0-10	Yes
1	Yes	No	No	Yes	Full	$	No	No	Thai	30-60	No
2	No	Yes	No	No	Some	$	No	No	Burger	0-10	Yes
3	Yes	No	Yes	Yes	Full	$	No	No	Thai	10-30	Yes
4	Yes	No	Yes	No	Full	$$$	No	Yes	French	>60	No
5	No	Yes	No	Yes	Some	$$	Yes	Yes	Italian	0-10	Yes
6	No	Yes	No	No	None	$	Yes	No	Burger	0-10	No
7	No	No	No	Yes	Some	$$	Yes	Yes	Thai	0-10	Yes
8	No	Yes	Yes	No	Full	$	Yes	No	Burger	>60	No
9	Yes	Yes	Yes	Yes	Full	$$$	No	Yes	Italian	10-30	No
10	No	No	No	No	None	$	No	No	Thai	0-10	No
11	Yes	Yes	Yes	Yes	Full	$	No	No	Burger	30-60	Yes

Decision Trees Intro¶

Part 1: The Restaurant Dataset¶

Part 2: The entropy of English¶