最近发现Python DataTable有查询csv, Excel表数据真的很好用。强烈推荐给大家。有一些甚至比Pandas都要好用一些。这篇文章记录一些查询数据的方法。其中一些小的细节,请大家自己观察结果的返回。
读取csv数据:
tips = dt.fread("D:\\pythonProject\\datacenterqa\\datatables_example\\tips.csv")
1. "SELECT * FROM tips"
dt1 = tips
print(dt1)
结果:
| total_bill tip sex smoker day time size
| float64 float64 str32 str32 str32 str32 int32
--- + ---------- ------- ------ ------ ----- ------ -----
0 | 16.99 1.01 Female No Sun Dinner 2
1 | 10.34 1.66 Male No Sun Dinner 3
2 | 21.01 3.5 Male No Sun Dinner 3
3 | 23.68 3.31 Male No Sun Dinner 2
4 | 24.59 3.61 Female No Sun Dinner 4
5 | 25.29 4.71 Male No Sun Dinner 4
6 | 8.77 2 Male No Sun Dinner 2
7 | 26.88 3.12 Male No Sun Dinner 4
8 | 15.04 1.96 Male No Sun Dinner 2
9 | 14.78 3.23 Male No Sun Dinner 2
10 | 10.27 1.71 Male No Sun Dinner 2
11 | 35.26 5 Female No Sun Dinner 4
12 | 15.42 1.57 Male No Sun Dinner 2
13 | 18.43 3 Male No Sun Dinner 4
14 | 14.83 3.02 Female No Sun Dinner 2
… | … … … … … … …
239 | 29.03 5.92 Male No Sat Dinner 3
240 | 27.18 2 Female Yes Sat Dinner 2
241 | 22.67 2 Male Yes Sat Dinner 2
242 | 17.82 1.75 Male No Sat Dinner 2
243 | 18.78 3 Female No Thur Dinner 2
[244 rows x 7 columns]
2. "SELECT * FROM tips limit 5"
dt2= tips[:5,:]
print(dt2)
结果:
| total_bill tip sex smoker day time size
| float64 float64 str32 str32 str32 str32 int32
-- + ---------- ------- ------ ------ ----- ------ -----
0 | 16.99 1.01 Female No Sun Dinner 2
1 | 10.34 1.66 Male No Sun Dinner 3
2 | 21.01 3.5 Male No Sun Dinner 3
3 | 23.68 3.31 Male No Sun Dinner 2
4 | 24.59 3.61 Female No Sun Dinner 4
[5 rows x 7 columns]
3. "SELECT total_bill, tip, sex, time FROM tips"
dt3 = tips[:, ['total_bill', 'tip', 'sex','time']]
print(dt3)
结果:
| total_bill tip sex time
| float64 float64 str32 str32
--- + ---------- ------- ------ ------
0 | 16.99 1.01 Female Dinner
1 | 10.34 1.66 Male Dinner
2 | 21.01 3.5 Male Dinner
3 | 23.68 3.31 Male Dinner
4 | 24.59 3.61 Female Dinner
5 | 25.29 4.71 Male Dinner
6 | 8.77 2 Male Dinner
7 | 26.88 3.12 Male Dinner
8 | 15.04 1.96 Male Dinner
9 | 14.78 3.23 Male Dinner
10 | 10.27 1.71 Male Dinner
11 | 35.26 5 Female Dinner
12 | 15.42 1.57 Male Dinner
13 | 18.43 3 Male Dinner
14 | 14.83 3.02 Female Dinner
… | … … … …
239 | 29.03 5.92 Male Dinner
240 | 27.18 2 Female Dinner
241 | 22.67 2 Male Dinner
242 | 17.82 1.75 Male Dinner
243 | 18.78 3 Female Dinner
[244 rows x 4 columns]
4. "SELECT total_bill, tip, sex, time FROM tips where sex = 'Female'"
dt4 = tips[dt.rowany(f["sex"]=="Female"), ("total_bill","tip","sex","time")]
print(dt4)
结果:
| total_bill tip sex time
| float64 float64 str32 str32
-- + ---------- ------- ------ ------
0 | 16.99 1.01 Female Dinner
1 | 24.59 3.61 Female Dinner
2 | 35.26 5 Female Dinner
3 | 14.83 3.02 Female Dinner
4 | 10.33 1.67 Female Dinner
5 | 16.97 3.5 Female Dinner
6 | 20.29 2.75 Female Dinner
7 | 15.77 2.23 Female Dinner
8 | 19.65 3 Female Dinner
9 | 15.06 3 Female Dinner
10 | 20.69 2.45 Female Dinner
11 | 16.93 3.07 Female Dinner
12 | 10.29 2.6 Female Dinner
13 | 34.81 5.2 Female Dinner
14 | 26.41 1.5 Female Dinner
… | … … … …
82 | 10.09 2 Female Lunch
83 | 22.12 2.88 Female Dinner
84 | 35.83 4.67 Female Dinner
85 | 27.18 2 Female Dinner
86 | 18.78 3 Female Dinner
[87 rows x 4 columns]
另一种实现:
dt5 = tips[dt.rowall(f["sex"] == "Female"), ("total_bill", "tip", "sex", "time")]
print(dt5)
结果:
| total_bill tip sex time
| float64 float64 str32 str32
-- + ---------- ------- ------ ------
0 | 16.99 1.01 Female Dinner
1 | 24.59 3.61 Female Dinner
2 | 35.26 5 Female Dinner
3 | 14.83 3.02 Female Dinner
4 | 10.33 1.67 Female Dinner
5 | 16.97 3.5 Female Dinner
6 | 20.29 2.75 Female Dinner
7 | 15.77 2.23 Female Dinner
8 | 19.65 3 Female Dinner
9 | 15.06 3 Female Dinner
10 | 20.69 2.45 Female Dinner
11 | 16.93 3.07 Female Dinner
12 | 10.29 2.6 Female Dinner
13 | 34.81 5.2 Female Dinner
14 | 26.41 1.5 Female Dinner
… | … … … …
82 | 10.09 2 Female Lunch
83 | 22.12 2.88 Female Dinner
84 | 35.83 4.67 Female Dinner
85 | 27.18 2 Female Dinner
86 | 18.78 3 Female Dinner
[87 rows x 4 columns]
5. "SELECT * FROM tips where sex = 'Female' and total_bill>20"
dt6 = tips[(f["sex"] == "Female") & (f["total_bill"]>20), :]
print(dt6)
结果:
| total_bill tip sex smoker day time size
| float64 float64 str32 str32 str32 str32 int32
-- + ---------- ------- ------ ------ ----- ------ -----
0 | 24.59 3.61 Female No Sun Dinner 4
1 | 35.26 5 Female No Sun Dinner 4
2 | 20.29 2.75 Female No Sat Dinner 2
3 | 20.69 2.45 Female No Sat Dinner 4
4 | 34.81 5.2 Female No Sun Dinner 4
5 | 26.41 1.5 Female No Sat Dinner 2
6 | 26.86 3.14 Female Yes Sat Dinner 2
7 | 25.28 5 Female Yes Sat Dinner 2
8 | 34.83 5.17 Female No Thur Lunch 4
9 | 22.75 3.25 Female No Fri Dinner 2
10 | 44.3 2.5 Female Yes Sat Dinner 3
11 | 22.42 3.48 Female Yes Sat Dinner 2
12 | 20.92 4.08 Female No Sat Dinner 2
13 | 25.71 4 Female No Sun Dinner 3
14 | 24.08 2.92 Female No Thur Lunch 4
15 | 29.8 4.2 Female No Thur Lunch 6
16 | 20.27 2.83 Female No Thur Lunch 2
17 | 27.05 5 Female No Thur Lunch 6
18 | 29.85 5.14 Female No Sun Dinner 5
19 | 25 3.75 Female No Sun Dinner 4
20 | 20.9 3.5 Female Yes Sun Dinner 3
21 | 43.11 5 Female Yes Thur Lunch 4
22 | 28.17 6.5 Female Yes Sat Dinner 3
23 | 30.14 3.09 Female Yes Sat Dinner 4
24 | 22.12 2.88 Female Yes Sat Dinner 2
25 | 35.83 4.67 Female No Sat Dinner 3
26 | 27.18 2 Female Yes Sat Dinner 2
[27 rows x 7 columns]
6. "SELECT total_bill, tip, sex FROM tips where sex = 'Female' and total_bill>20"
dt7 = tips[(f["sex"] == "Female") & (f["total_bill"] > 20), ("total_bill", "tip", "sex")]
print(dt7)
结果:
| total_bill tip sex
| float64 float64 str32
-- + ---------- ------- ------
0 | 24.59 3.61 Female
1 | 35.26 5 Female
2 | 20.29 2.75 Female
3 | 20.69 2.45 Female
4 | 34.81 5.2 Female
5 | 26.41 1.5 Female
6 | 26.86 3.14 Female
7 | 25.28 5 Female
8 | 34.83 5.17 Female
9 | 22.75 3.25 Female
10 | 44.3 2.5 Female
11 | 22.42 3.48 Female
12 | 20.92 4.08 Female
13 | 25.71 4 Female
14 | 24.08 2.92 Female
15 | 29.8 4.2 Female
16 | 20.27 2.83 Female
17 | 27.05 5 Female
18 | 29.85 5.14 Female
19 | 25 3.75 Female
20 | 20.9 3.5 Female
21 | 43.11 5 Female
22 | 28.17 6.5 Female
23 | 30.14 3.09 Female
24 | 22.12 2.88 Female
25 | 35.83 4.67 Female
26 | 27.18 2 Female
[27 rows x 3 columns]
7."SELECT total_bill, tip, sex FROM tips where sex = 'Female' and total_bill>20 limit 5"
dt8 = tips[(f["sex"] == "Female") & (f["total_bill"] > 20), ("total_bill", "tip", "sex")].head(5)
print(dt8)
结果:
| total_bill tip sex
| float64 float64 str32
-- + ---------- ------- ------
0 | 24.59 3.61 Female
1 | 35.26 5 Female
2 | 20.29 2.75 Female
3 | 20.69 2.45 Female
4 | 34.81 5.2 Female
[5 rows x 3 columns]
8. "SELECT *, total_bill*2 AS total_bill_doubled FROM tips LIMIT 5;"
dt9 = tips[: 5,f[:].extend({"total_bill_doubled": f.total_bill * 2})]
print(dt9)
结果:
| total_bill tip sex smoker day time size total_bill_doubled
| float64 float64 str32 str32 str32 str32 int32 float64
-- + ---------- ------- ------ ------ ----- ------ ----- ------------------
0 | 16.99 1.01 Female No Sun Dinner 2 33.98
1 | 10.34 1.66 Male No Sun Dinner 3 20.68
2 | 21.01 3.5 Male No Sun Dinner 3 42.02
3 | 23.68 3.31 Male No Sun Dinner 2 47.36
4 | 24.59 3.61 Female No Sun Dinner 4 49.18
[5 rows x 8 columns]
9. "SELECT total_bill, tip,sex, total_bill*2 AS total_bill_doubled FROM tips;"
dt10 = tips[:,("total_bill", "tip", "sex")][:,f[:].extend({"total_bill_doubled": f.total_bill * 2})]
print(dt10)
结果:
| total_bill tip sex total_bill_doubled
| float64 float64 str32 float64
--- + ---------- ------- ------ ------------------
0 | 16.99 1.01 Female 33.98
1 | 10.34 1.66 Male 20.68
2 | 21.01 3.5 Male 42.02
3 | 23.68 3.31 Male 47.36
4 | 24.59 3.61 Female 49.18
5 | 25.29 4.71 Male 50.58
6 | 8.77 2 Male 17.54
7 | 26.88 3.12 Male 53.76
8 | 15.04 1.96 Male 30.08
9 | 14.78 3.23 Male 29.56
10 | 10.27 1.71 Male 20.54
11 | 35.26 5 Female 70.52
12 | 15.42 1.57 Male 30.84
13 | 18.43 3 Male 36.86
14 | 14.83 3.02 Female 29.66
… | … … … …
239 | 29.03 5.92 Male 58.06
240 | 27.18 2 Female 54.36
241 | 22.67 2 Male 45.34
242 | 17.82 1.75 Male 35.64
243 | 18.78 3 Female 37.56
[244 rows x 4 columns]
10."SELECT *, total_bill*2 AS total_bill_doubled FROM tips where sex = 'Female';"
dt11 = tips[f["sex"] == "Female",:][:, f[:].extend({"total_bill_doubled": f.total_bill * 2})]
print(dt11)
结果:
| total_bill tip sex smoker day time size total_bill_doubled
| float64 float64 str32 str32 str32 str32 int32 float64
-- + ---------- ------- ------ ------ ----- ------ ----- ------------------
0 | 16.99 1.01 Female No Sun Dinner 2 33.98
1 | 24.59 3.61 Female No Sun Dinner 4 49.18
2 | 35.26 5 Female No Sun Dinner 4 70.52
3 | 14.83 3.02 Female No Sun Dinner 2 29.66
4 | 10.33 1.67 Female No Sun Dinner 3 20.66
5 | 16.97 3.5 Female No Sun Dinner 3 33.94
6 | 20.29 2.75 Female No Sat Dinner 2 40.58
7 | 15.77 2.23 Female No Sat Dinner 2 31.54
8 | 19.65 3 Female No Sat Dinner 2 39.3
9 | 15.06 3 Female No Sat Dinner 2 30.12
10 | 20.69 2.45 Female No Sat Dinner 4 41.38
11 | 16.93 3.07 Female No Sat Dinner 3 33.86
12 | 10.29 2.6 Female No Sun Dinner 2 20.58
13 | 34.81 5.2 Female No Sun Dinner 4 69.62
14 | 26.41 1.5 Female No Sat Dinner 2 52.82
… | … … … … … … … …
82 | 10.09 2 Female Yes Fri Lunch 2 20.18
83 | 22.12 2.88 Female Yes Sat Dinner 2 44.24
84 | 35.83 4.67 Female No Sat Dinner 3 71.66
85 | 27.18 2 Female Yes Sat Dinner 2 54.36
86 | 18.78 3 Female No Thur Dinner 2 37.56
[87 rows x 8 columns]