import time
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import Ridge
from sklearn.pipeline import FeatureUnion
In [3]:
train_data = pd.read_csv('../data/4/train.csv', sep="\t")
test_data = pd.read_csv('../data/4/test.csv',sep='\t')
# train_data = pd.concat([train_data_1, train_data_1, train_data_1, train_data_1, train_data_1], axis=0)
# pre_data = pd.concat([pre_data_1, pre_data_1, pre_data_1, pre_data_1, pre_data_1], axis=0)
In [5]:
train_data.info()
# train_id – 训练序号 name – 商品名称
# item_condition_id – 物品当前状态 brand_name – 品牌名称
# shipping – 是否包邮 item_description – 商品描述
# category_name – 商品类别 price – 商品价格
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 474710 entries, 0 to 474709
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 train_id 474710 non-null int64
数据挖掘目标
最新推荐文章于 2024-07-17 16:32:20 发布