import my_module

def test_invocation():
    features, target = my_module.get_features_and_target(
        csv_file='../data/adult-census.csv',
        target_col='class'
    )


import os
os.getcwd()

'/Users/eswan18/Teaching/advanced-python-datasci/notebooks'

cd '/Users/eswan18/Teaching/advanced-python-datasci/notebooks'

conda activate uc-python

pytest tests.py


import my_module

def test_invocation():
    features, target = my_module.get_features_and_target(
        csv_file='../data/adult-census.csv',
        target_col='class'
    )


def test_without_args():
    # A test we know will fail because we don't provide arguments
    # to the function.
    features, target = my_module.get_features_and_target()

def test_without_args():
        # A test we know will fail because we don't provide arguments
        # to the function.
>       features, target = my_module.get_features_and_target()

E       TypeError: get_features_and_target() missing 2 required positional arguments: 'csv_file' and 'target_col'

tests.py:12: TypeError


import pandas as pd # You may want to move this import to the top of the file.

def test_return_types():
    features, target = my_module.get_features_and_target(
        csv_file='../data/adult-census.csv',
        target_col='class'
    )
    assert isinstance(features, pd.DataFrame)
    assert isinstance(target, pd.Series)

=========== 2 passed in 0.88s ===========


assert 3 - 2 == 1


assert 100 > 50


assert 4 * 3 == 11

---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
/var/folders/j3/v1318ng94fvdpq7kzr0hq9kw0000gn/T/ipykernel_72985/55912000.py in <module>
----> 1 assert 4 * 3 == 11

AssertionError:


x = 5
assert isinstance(x, int)


assert isinstance(x, str)

---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
/var/folders/j3/v1318ng94fvdpq7kzr0hq9kw0000gn/T/ipykernel_72985/3799747894.py in <module>
----> 1 assert isinstance(x, str)

AssertionError:


def test_return_types():
    features, target = my_module.get_features_and_target(
        csv_file='../data/adult-census.csv',
        target_col='class'
    )
    assert isinstance(features, pd.DataFrame)
    assert isinstance(target, list)


def test_cols_make_sense():
    features, target = my_module.get_features_and_target(
        csv_file='../data/adult-census.csv',
        target_col='class'
    )
    # Load the data ourselves so we can double-check the columns
    df = pd.read_csv('../data/adult-census.csv')
    assert target.name in df.columns
    # Use a list comprehension to check all the feature columns
    assert all([feature_col in df.columns for feature_col in features])

============= 3 passed in 0.97s =============


def test_return_types_census():
    features, target = my_module.get_features_and_target(
        csv_file='../data/adult-census.csv',
        target_col='class'
    )
    assert isinstance(features, pd.DataFrame)
    assert isinstance(target, pd.Series)
    
def test_return_types_ames():
    features, target = my_module.get_features_and_target(
        csv_file='../data/ames.csv',
        target_col='Sale_Price'
    )
    assert isinstance(features, pd.DataFrame)
    assert isinstance(target, pd.Series)


import pytest

@pytest.mark.parametrize(
    'csv,target',
    [
        ('../data/adult-census.csv', 'class'),
        ('../data/ames.csv', 'Sale_Price')
    ]
)
def test_return_types(csv, target):
    features, target = my_module.get_features_and_target(
        csv_file=csv,
        target_col=target
    )
    assert isinstance(features, pd.DataFrame)
    assert isinstance(target, pd.Series)

=========== 4 passed in 0.93s ==========


features, target = my_module.get_features_and_target(
    csv_file=['../data/ames.csv'], # notice that we're passing a list here
    target_col='Sale_Price'
)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/var/folders/j3/v1318ng94fvdpq7kzr0hq9kw0000gn/T/ipykernel_72985/3089255148.py in <module>
----> 1 features, target = my_module.get_features_and_target(
      2     csv_file=['../data/ames.csv'], # notice that we're passing a list here
      3     target_col='Sale_Price'
      4 )

~/Teaching/advanced-python-datasci/notebooks/my_module.py in get_features_and_target(csv_file, target_col)
      7     '''Split a CSV into a DF of numeric features and a target column.'''
      8 
----> 9     adult_census = pd.read_csv(csv_file)
     10 
     11     raw_features = adult_census.drop(columns=target_col)

~/anaconda3/envs/uc-python/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    309                     stacklevel=stacklevel,
    310                 )
--> 311             return func(*args, **kwargs)
    312 
    313         return wrapper

~/anaconda3/envs/uc-python/lib/python3.9/site-packages/pandas/io/parsers/readers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
    584     kwds.update(kwds_defaults)
    585 
--> 586     return _read(filepath_or_buffer, kwds)
    587 
    588 

~/anaconda3/envs/uc-python/lib/python3.9/site-packages/pandas/io/parsers/readers.py in _read(filepath_or_buffer, kwds)
    480 
    481     # Create the parser.
--> 482     parser = TextFileReader(filepath_or_buffer, **kwds)
    483 
    484     if chunksize or iterator:

~/anaconda3/envs/uc-python/lib/python3.9/site-packages/pandas/io/parsers/readers.py in __init__(self, f, engine, **kwds)
    809             self.options["has_index_names"] = kwds["has_index_names"]
    810 
--> 811         self._engine = self._make_engine(self.engine)
    812 
    813     def close(self):

~/anaconda3/envs/uc-python/lib/python3.9/site-packages/pandas/io/parsers/readers.py in _make_engine(self, engine)
   1038             )
   1039         # error: Too many arguments for "ParserBase"
-> 1040         return mapping[engine](self.f, **self.options)  # type: ignore[call-arg]
   1041 
   1042     def _failover_to_python(self):

~/anaconda3/envs/uc-python/lib/python3.9/site-packages/pandas/io/parsers/c_parser_wrapper.py in __init__(self, src, **kwds)
     49 
     50         # open handles
---> 51         self._open_handles(src, kwds)
     52         assert self.handles is not None
     53 

~/anaconda3/envs/uc-python/lib/python3.9/site-packages/pandas/io/parsers/base_parser.py in _open_handles(self, src, kwds)
    220         Let the readers open IOHandles after they are done with their potential raises.
    221         """
--> 222         self.handles = get_handle(
    223             src,
    224             "r",

~/anaconda3/envs/uc-python/lib/python3.9/site-packages/pandas/io/common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    607 
    608     # open URLs
--> 609     ioargs = _get_filepath_or_buffer(
    610         path_or_buf,
    611         encoding=encoding,

~/anaconda3/envs/uc-python/lib/python3.9/site-packages/pandas/io/common.py in _get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode, storage_options)
    394     if not is_file_like(filepath_or_buffer):
    395         msg = f"Invalid file path or buffer object type: {type(filepath_or_buffer)}"
--> 396         raise ValueError(msg)
    397 
    398     return IOArgs(

ValueError: Invalid file path or buffer object type: <class 'list'>

ValueError: Invalid file path or buffer object type: <class 'list'>


@pytest.mark.parametrize(
    'csv', [ ['a', 'b', 'c'], 123 ]
)
def test_bad_input_error(csv):
    with pytest.raises(ValueError):
        features, target = my_module.get_features_and_target(
            csv_file=csv,
            target_col='Sale_Price'
        )

if x < 0:
    raise RuntimeError('Invalid value for x')

Testing¶

The Value of Testing¶

A Minimal Test¶

Running Our Test¶

Pytest¶

What does it mean to "fail"?¶

Assert¶

`test_cols_make_sense`¶

Parametrization¶

Expecting Exceptions¶

Aside: Raising Your Own Exceptions¶

Good Tests¶

Wrapping Up¶

Questions¶

Testing¶

The Value of Testing¶

A Minimal Test¶

Running Our Test¶

Pytest¶

What does it mean to "fail"?¶

Assert¶

test_cols_make_sense¶

Parametrization¶

Expecting Exceptions¶

Aside: Raising Your Own Exceptions¶

Good Tests¶

Wrapping Up¶

Questions¶

`test_cols_make_sense`¶