import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
submission = pd.read_csv('/kaggle/input/playground-series-s3e10/sample_submission.csv')
train = pd.read_csv('/kaggle/input/playground-series-s3e10/train.csv')
test = pd.read_csv('/kaggle/input/playground-series-s3e10/test.csv')
train.describe()
train.info()
X = train.iloc[:,1:9]
y = train['Class']
X_train, X_val, y_train, y_val = train_test_split(X, y)
X_test = test.iloc[:,1:9]
#reg = xgb.XGBRegressor()
#reg_cv = GridSearchCV(reg, {'max_depth': [2,4,6], 'n_estimators': [25,50,100]}, verbose=1)
#reg_cv.fit(X_train, y_train)
#reg_cv.best_params_
para = {'max_depth':4, 'n_estimators':50}
reg = xgb.XGBRegressor(**para)
reg.fit(X_train, y_train)
y_pred = reg.predict(X_train)
y_val_pred = reg.predict(X_val)
y_test = reg.predict(X_test)
y_pred[y_pred<0] = 0
y_pred[y_pred>1] = 1
y_val_pred[y_val_pred<0] = 0
y_val_pred[y_val_pred>1] = 1
y_test[y_test<0] = 0
y_test[y_test>1] = 1
submission['Class'] = y_test
submission.to_csv("submission.csv", index=False)