hidden cell
# Write and run code here
hidden cell
hidden cell
# Write and run code here
Gender Customer Type Age Type of Travel Class \
id
0 Male disloyal Customer 48 Business travel Business
1 Male Loyal Customer 50 Business travel Business
2 Male Loyal Customer 43 Business travel Business
3 Female Loyal Customer 38 Business travel Business
4 Male disloyal Customer 24 Business travel Eco
Flight Distance Inflight wifi service Departure/Arrival time convenient \
id
0 821 3 3
1 1905 2 2
2 1963 3 3
3 2822 2 2
4 453 2 2
Ease of Online booking Gate location ... Inflight entertainment \
id ...
0 3 3 ... 5
1 2 2 ... 5
2 3 3 ... 5
3 5 2 ... 5
4 2 4 ... 5
On-board service Leg room service Baggage handling Checkin service \
id
0 3 2 5 4
1 5 5 5 3
2 5 5 5 4
3 5 5 5 3
4 2 4 4 2
Inflight service Cleanliness Departure Delay in Minutes \
id
0 5 5 2
1 5 4 0
2 5 4 0
3 5 4 13
4 4 5 16
Arrival Delay in Minutes satisfaction
id
0 5.0 neutral or dissatisfied
1 0.0 satisfied
2 0.0 satisfied
3 0.0 satisfied
4 30.0 neutral or dissatisfied
[5 rows x 23 columns]
hidden cell
# Write and run code here
Gender Customer Type Age Type of Travel Class \
id
43289 Male Loyal Customer 47 Business travel Eco Plus
43290 Female disloyal Customer 23 Business travel Business
43291 Female Loyal Customer 28 Personal Travel Eco Plus
43292 Male Loyal Customer 41 Personal Travel Eco Plus
43293 Female Loyal Customer 20 Personal Travel Eco Plus
Flight Distance Inflight wifi service \
id
43289 447 3
43290 337 2
43291 337 4
43292 308 3
43293 337 3
Departure/Arrival time convenient Ease of Online booking \
id
43289 4 4
43290 2 2
43291 2 4
43292 5 3
43293 1 3
Gate location ... Inflight entertainment On-board service \
id ...
43289 4 ... 4 2
43290 4 ... 5 2
43291 4 ... 3 1
43292 4 ... 2 5
43293 2 ... 2 4
Leg room service Baggage handling Checkin service Inflight service \
id
43289 5 3 1 4
43290 1 4 2 4
43291 5 3 4 4
43292 5 5 5 4
43293 4 1 4 2
Cleanliness Departure Delay in Minutes Arrival Delay in Minutes \
id
43289 4 0 0.0
43290 5 46 58.0
43291 3 0 0.0
43292 2 0 0.0
43293 2 0 0.0
satisfaction
id
43289 neutral or dissatisfied
43290 neutral or dissatisfied
43291 neutral or dissatisfied
43292 neutral or dissatisfied
43293 neutral or dissatisfied
[5 rows x 23 columns]
hidden cell
hidden cell
# Write and run code here
null_count null_percent
Arrival Delay in Minutes 136 0.314131
hidden cell
# Write and run code here
Gender 0
Customer Type 0
Age 0
Type of Travel 0
Class 0
Flight Distance 0
Inflight wifi service 0
Departure/Arrival time convenient 0
Ease of Online booking 0
Gate location 0
Food and drink 0
Online boarding 0
Seat comfort 0
Inflight entertainment 0
On-board service 0
Leg room service 0
Baggage handling 0
Checkin service 0
Inflight service 0
Cleanliness 0
Departure Delay in Minutes 0
Arrival Delay in Minutes 0
satisfaction 0
dtype: int64
hidden cell
# Write and run code here
array(['gender', 'customer_type', 'age', 'type_of_travel', 'class',
'flight_distance', 'inflight_wifi_service',
'departure/arrival_time_convenient', 'ease_of_online_booking',
'gate_location', 'food_and_drink', 'online_boarding',
'seat_comfort', 'inflight_entertainment', 'on-board_service',
'leg_room_service', 'baggage_handling', 'checkin_service',
'inflight_service', 'cleanliness', 'departure_delay_in_minutes',
'arrival_delay_in_minutes', 'satisfaction'], dtype=object)
hidden cell
# Write and run code here
<class 'pandas.core.frame.DataFrame'>
Int64Index: 43294 entries, 0 to 43293
Data columns (total 23 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 gender 43294 non-null object
1 customer_type 43294 non-null object
2 age 43294 non-null int64
3 type_of_travel 43294 non-null object
4 class 43294 non-null object
5 flight_distance 43294 non-null int64
6 inflight_wifi_service 43294 non-null int64
7 departure/arrival_time_convenient 43294 non-null int64
8 ease_of_online_booking 43294 non-null int64
9 gate_location 43294 non-null int64
10 food_and_drink 43294 non-null int64
11 online_boarding 43294 non-null int64
12 seat_comfort 43294 non-null int64
13 inflight_entertainment 43294 non-null int64
14 on-board_service 43294 non-null int64
15 leg_room_service 43294 non-null int64
16 baggage_handling 43294 non-null int64
17 checkin_service 43294 non-null int64
18 inflight_service 43294 non-null int64
19 cleanliness 43294 non-null int64
20 departure_delay_in_minutes 43294 non-null int64
21 arrival_delay_in_minutes 43294 non-null float64
22 satisfaction 43294 non-null object
dtypes: float64(1), int64(17), object(5)
memory usage: 7.9+ MB
None
hidden cell
hidden cell
# Write and run code here
<class 'pandas.core.frame.DataFrame'>
Int64Index: 43294 entries, 0 to 43293
Data columns (total 23 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 gender 43294 non-null category
1 customer_type 43294 non-null category
2 age 43294 non-null int64
3 type_of_travel 43294 non-null category
4 class 43294 non-null category
5 flight_distance 43294 non-null int64
6 inflight_wifi_service 43294 non-null category
7 departure/arrival_time_convenient 43294 non-null category
8 ease_of_online_booking 43294 non-null category
9 gate_location 43294 non-null category
10 food_and_drink 43294 non-null category
11 online_boarding 43294 non-null category
12 seat_comfort 43294 non-null category
13 inflight_entertainment 43294 non-null category
14 on-board_service 43294 non-null category
15 leg_room_service 43294 non-null category
16 baggage_handling 43294 non-null category
17 checkin_service 43294 non-null category
18 inflight_service 43294 non-null category
19 cleanliness 43294 non-null category
20 departure_delay_in_minutes 43294 non-null int64
21 arrival_delay_in_minutes 43294 non-null float64
22 satisfaction 43294 non-null object
dtypes: category(18), float64(1), int64(3), object(1)
memory usage: 2.7+ MB
None
hidden cell
# Write and run code here
age flight_distance departure_delay_in_minutes \
count 43294.000000 43294.000000 43294.000000
mean 39.521828 1189.167598 14.912066
std 15.103227 994.788266 38.969990
min 7.000000 31.000000 0.000000
25% 27.000000 414.000000 0.000000
50% 40.000000 844.000000 0.000000
75% 51.000000 1744.000000 12.000000
max 85.000000 4983.000000 1592.000000
arrival_delay_in_minutes
count 43294.000000
mean 15.234812
std 39.179657
min 0.000000
25% 0.000000
50% 0.000000
75% 13.000000
max 1584.000000
hidden cell
# Write and run code here
gender customer_type type_of_travel class \
count 43294 43294 43294 43294
unique 2 2 2 3
top Female Loyal Customer Business travel Business
freq 21987 35417 29929 20700
inflight_wifi_service departure/arrival_time_convenient \
count 43294 43294
unique 6 6
top 3 4
freq 10787 10576
ease_of_online_booking gate_location food_and_drink \
count 43294 43294 43294
unique 6 5 6
top 2 3 4
freq 10056 11878 10292
online_boarding seat_comfort inflight_entertainment \
count 43294 43294 43294
unique 6 5 6
top 4 4 4
freq 12798 13330 12304
on-board_service leg_room_service baggage_handling checkin_service \
count 43294 43294 43294 43294
unique 6 6 5 5
top 4 4 4 4
freq 12908 11849 15542 12023
inflight_service cleanliness
count 43294 43294
unique 6 6
top 4 4
freq 15776 11349
hidden cell
# Write and run code here
hidden cell
hidden cell
# Write and run code here
hidden cell
# Write and run code here
feature VIF
0 age 2.292531
1 flight_distance 2.212611
2 departure_delay_in_minutes 14.181102
3 arrival_delay_in_minutes 14.216816
hidden cell
hidden cell
# Write and run code here
hidden cell
# Write and run code here
{'neutral or dissatisfied': 0, 'satisfied': 1}
hidden cell
# Write and run code here
id
0 0
1 1
2 1
3 1
4 0
Name: satisfaction, dtype: int64
hidden cell
hidden cell
# Write and run code here
hidden cell
# Write and run code here
hidden cell
# Write and run code here
hidden cell
# Write and run code here
hidden cell
hidden cell
# Write and run code here
hidden cell
# Write and run code here
p_value is_correlated
gender 0.009381 True
customer_type 0.000000 True
type_of_travel 0.000000 True
class 0.000000 True
hidden cell
hidden cell
# Write and run code here
hidden cell
# Write and run code here
id
0 3
1 3
2 3
3 2
4 1
Name: age_group, dtype: int8
hidden cell
hidden cell
# Write and run code here
gender customer_type type_of_travel class flight_distance \
id
0 Male disloyal Customer Business travel Business 821
1 Male Loyal Customer Business travel Business 1905
2 Male Loyal Customer Business travel Business 1963
3 Female Loyal Customer Business travel Business 2822
4 Male disloyal Customer Business travel Eco 453
inflight_wifi_service departure/arrival_time_convenient \
id
0 3 3
1 2 2
2 3 3
3 2 2
4 2 2
ease_of_online_booking gate_location food_and_drink ... seat_comfort \
id ...
0 3 3 5 ... 5
1 2 2 4 ... 5
2 3 3 5 ... 5
3 5 2 2 ... 4
4 2 4 5 ... 5
on-board_service leg_room_service baggage_handling checkin_service \
id
0 3 2 5 4
1 5 5 5 3
2 5 5 5 4
3 5 5 5 3
4 2 4 4 2
inflight_service cleanliness departure_delay_in_minutes satisfaction \
id
0 5 5 2 0
1 5 4 0 1
2 5 4 0 1
3 5 4 13 1
4 4 5 16 0
age_group
id
0 3
1 3
2 3
3 2
4 1
[5 rows x 21 columns]
hidden cell
# Write and run code here
flight_distance inflight_wifi_service departure/arrival_time_convenient \
id
0 821 3 3
1 1905 2 2
2 1963 3 3
3 2822 2 2
4 453 2 2
ease_of_online_booking gate_location food_and_drink online_boarding \
id
0 3 3 5 3
1 2 2 4 4
2 3 3 5 4
3 5 2 2 5
4 2 4 5 2
seat_comfort on-board_service leg_room_service ... inflight_service \
id ...
0 5 3 2 ... 5
1 5 5 5 ... 5
2 5 5 5 ... 5
3 4 5 5 ... 5
4 5 2 4 ... 4
cleanliness departure_delay_in_minutes satisfaction age_group \
id
0 5 2 0 3
1 4 0 1 3
2 4 0 1 3
3 4 13 1 2
4 5 16 0 1
gender_male customer_type_disloyal_customer \
id
0 1 1
1 1 0
2 1 0
3 0 0
4 1 1
type_of_travel_personal_travel class_eco class_eco_plus
id
0 0 0 0
1 0 0 0
2 0 0 0
3 0 0 0
4 0 1 0
[5 rows x 22 columns]
hidden cell
# Write and run code here
hidden cell
hidden cell
# Write and run code here
hidden cell
# Write and run code here
Best accuracy: 92.05%, best params: {'knn__n_neighbors': 9}
Test accuracy: 92.47%
hidden cell
# Write and run code here
hidden cell
# Write and run code here
Best accuracy: 87.43%, best params: {'lr__C': 0.05}
Test accuracy: 87.32%
hidden cell
# Write and run code here
hidden cell
# Write and run code here
Test accuracy: 95.66%, best number of trees: 160
hidden cell
# Write and run code here
hidden cell
# Write and run code here
hidden cell
# Write and run code here
accuracy precision recall f1_score
random_forest 0.956582 0.970083 0.928912 0.949051
knn 0.924711 0.956649 0.866313 0.909243
logistic_regression 0.873210 0.873602 0.828647 0.850531
hidden cell
# Write and run code here
hidden cell