我正在研究2019年数据科学碗。当我使用熊猫读取数据时,培训和测试数据需要很长时间,我想减少时间以便机器可以有效地运行分析。
import numpy as np # linear algebra
import pandas as pd # data processing,CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import plotly as py
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
init_notebook_mode(connected=True)
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
keep_cols = ['event_id','game_session','installation_id','event_count','event_code','title','game_time','type','world']
specs_df = pd.read_csv('/kaggle/input/data-science-bowl-2019/specs.csv')
train_df = pd.read_csv('/kaggle/input/data-science-bowl-2019/train.csv',usecols=keep_cols)
test_df = pd.read_csv('/kaggle/input/data-science-bowl-2019/test.csv')
train_labels_df = pd.read_csv('/kaggle/input/data-science-bowl-2019/train_labels.csv')