feature: add workshop 3 template
This commit is contained in:
@@ -0,0 +1,30 @@
|
||||
"""
|
||||
Workshop 3 Feature Engineering Pipeline für das Melbourne Housing Dataset.
|
||||
|
||||
Wendet die in Workshop 2 erarbeiteten Empfehlungen (WS_03_Empfehlungen.xlsx)
|
||||
auf melb_data.csv an und schreibt melb_data_prep.csv.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
RAW = "data/melb_data.csv"
|
||||
OUT = "data/melb_data_prep.csv"
|
||||
|
||||
|
||||
def load(path: str = RAW) -> pd.DataFrame:
|
||||
return pd.read_csv(path)
|
||||
|
||||
|
||||
def inspect(df: pd.DataFrame) -> None:
|
||||
"""Annahmen des Drehbuchs verifizieren -> wird in der Pipeline nicht aufgerufen."""
|
||||
print(df.shape)
|
||||
print(df.info())
|
||||
print(df.isna().sum()[df.isna().sum() > 0]) # NAs pro Spalte
|
||||
print(df.Price.sort_values().tail(5)) # Price-Ausreisser?
|
||||
print(df.YearBuilt.sort_values().head(5)) # das ominöse 1196
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
data = load()
|
||||
inspect(data)
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user