import numpy as np
import pandas as pd

adult_census = pd.read_csv("../data/adult-census.csv")

# create column names of interest
target_col = "class"

raw_features = adult_census.drop(columns=target_col)
numeric_features = raw_features.select_dtypes(np.number)
feature_cols = numeric_features.columns.values

features = adult_census[feature_cols]
target = adult_census[target_col]


def get_features_and_target():
    '''Split a CSV into a DF of numeric features and a target column.'''
    adult_census = pd.read_csv("../data/adult-census.csv")

    target_col = "class"
    
    raw_features = adult_census.drop(columns=target_col)
    numeric_features = raw_features.select_dtypes(np.number)
    feature_cols = numeric_features.columns.values

    features = adult_census[feature_cols]
    target = adult_census[target_col]
    
    return (features, target)


f, t = get_features_and_target()
f.head()


t.head()

0     <=50K
1     <=50K
2      >50K
3      >50K
4     <=50K
Name: class, dtype: object


def get_features_and_target():
    '''Split a CSV into a DF of numeric features and a target column.'''
    adult_census = pd.read_csv("../data/adult-census.csv")

    target_col = "class"
    
    raw_features = adult_census.drop(columns=target_col)
    numeric_features = raw_features.select_dtypes(np.number)
    feature_cols = numeric_features.columns.values

    features = adult_census[feature_cols]
    target = adult_census[target_col]
    
    return (features, target)


def get_features_and_target():
    '''Split a CSV into a DF of numeric features and a target column.'''
    adult_census = pd.read_csv("../data/adult-census.csv")

    target_col = "class"
    
    raw_features = adult_census.drop(columns=target_col)
    numeric_features = raw_features.select_dtypes(np.number)
    feature_cols = numeric_features.columns.values

    features = adult_census[feature_cols]
    target = adult_census[target_col]
    
    return (features, target)


def get_features_and_target(csv_file, target_col):
    '''Split a CSV into a DF of numeric features and a target column.'''
    
    adult_census = pd.read_csv(csv_file)
    
    raw_features = adult_census.drop(columns=target_col)
    numeric_features = raw_features.select_dtypes(np.number)
    feature_cols = numeric_features.columns.values

    features = adult_census[feature_cols]
    target = adult_census[target_col]
    
    return (features, target)


f, t = get_features_and_target()

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/var/folders/j3/v1318ng94fvdpq7kzr0hq9kw0000gn/T/ipykernel_2533/3218846325.py in <module>
----> 1 f, t = get_features_and_target()

TypeError: get_features_and_target() missing 2 required positional arguments: 'csv_file' and 'target_col'


# In Python, linebreaks and spaces inside parentheses are ignored.
f, t = get_features_and_target(
    csv_file='../data/adult-census.csv',
    target_col='class',
)


f.head()


ames_features, ames_target = get_features_and_target(
    csv_file='../data/ames.csv',
    target_col='Sale_Price',
)


ames_features.head()


import my_module


my_module.get_features_and_target?

Signature: my_module.get_features_and_target(csv_file, target_col)
Docstring: Split a CSV into a DF of numeric features and a target column.
File:      ~/Teaching/advanced-python-datasci/notebooks/my_module.py
Type:      function


f, t = my_module.get_features_and_target(
    csv_file='../data/adult-census.csv',
    target_col='class',
)


f.head()

	age	education-num	capital-gain	hours-per-week
0	25	7	0	40
1	38	9	0	50
2	28	12	0	40
3	44	10	7688	40
4	18	10	0	30

	age	education-num	capital-gain	hours-per-week
0	25	7	0	40
1	38	9	0	50
2	28	12	0	40
3	44	10	7688	40
4	18	10	0	30

	Lot_Frontage	Lot_Area	Year_Built	Year_Remod_Add	Mas_Vnr_Area	BsmtFin_SF_1	BsmtFin_SF_2	Bsmt_Unf_SF	Total_Bsmt_SF	First_Flr_SF	...	Open_Porch_SF	Screen_Porch	Misc_Val	Mo_Sold	Year_Sold	Longitude	Latitude
0	141	31770	1960	1960	112	2	0	441	1080	1656	...	62	0	0	5	2010	-93.619754	42.054035
1	80	11622	1961	1961	0	6	144	270	882	896	...	0	120	0	6	2010	-93.619756	42.053014
2	81	14267	1958	1958	108	1	0	406	1329	1329	...	36	0	12500	6	2010	-93.619387	42.052659
3	93	11160	1968	1968	0	1	0	1045	2110	2110	...	0	0	0	4	2010	-93.617320	42.051245
4	74	13830	1997	1998	0	3	0	137	928	928	...	34	0	0	3	2010	-93.638933	42.060899

	age	education-num	capital-gain	hours-per-week
0	25	7	0	40
1	38	9	0	50
2	28	12	0	40
3	44	10	7688	40
4	18	10	0	30

Modular Code¶

What's Modularity?¶

How do we achieve modularity in Python?¶

Example: Functions¶

Parametrizing Functions¶

Example: Files¶

Committing to GitHub¶

Questions¶