In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
In [3]:
# ===================================================
# Einlesen und erste Analyse des Datensatzes
# -- Der Hotelbuchungs-Datensatz wird geladen
# -- Überblick über die Struktur, Form und fehlende Werte
# ===================================================
In [4]:
df = pd.read_csv(r'C:\Users\Miso\Desktop\Data Analysis 2/hotel_bookings.csv')
df.head()
Out[4]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... deposit_type agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date
0 Resort Hotel 0 342 2015 July 27 1 0 0 2 ... No Deposit NaN NaN 0 Transient 0.0 0 0 Check-Out 7/1/2015
1 Resort Hotel 0 737 2015 July 27 1 0 0 2 ... No Deposit NaN NaN 0 Transient 0.0 0 0 Check-Out 7/1/2015
2 Resort Hotel 0 7 2015 July 27 1 0 1 1 ... No Deposit NaN NaN 0 Transient 75.0 0 0 Check-Out 7/2/2015
3 Resort Hotel 0 13 2015 July 27 1 0 1 1 ... No Deposit 304.0 NaN 0 Transient 75.0 0 0 Check-Out 7/2/2015
4 Resort Hotel 0 14 2015 July 27 1 0 2 2 ... No Deposit 240.0 NaN 0 Transient 98.0 0 1 Check-Out 7/3/2015

5 rows × 32 columns

In [5]:
df.shape
Out[5]:
(119390, 32)
In [6]:
df.isnull().values.any()
Out[6]:
np.True_
In [7]:
df.isnull().sum()
Out[7]:
hotel                                  0
is_canceled                            0
lead_time                              0
arrival_date_year                      0
arrival_date_month                     0
arrival_date_week_number               0
arrival_date_day_of_month              0
stays_in_weekend_nights                0
stays_in_week_nights                   0
adults                                 0
children                               4
babies                                 0
meal                                   0
country                              488
market_segment                         0
distribution_channel                   0
is_repeated_guest                      0
previous_cancellations                 0
previous_bookings_not_canceled         0
reserved_room_type                     0
assigned_room_type                     0
booking_changes                        0
deposit_type                           0
agent                              16340
company                           112593
days_in_waiting_list                   0
customer_type                          0
adr                                    0
required_car_parking_spaces            0
total_of_special_requests              0
reservation_status                     0
reservation_status_date                0
dtype: int64
In [8]:
df.fillna(0, inplace=True)
In [9]:
df['meal'].value_counts()
Out[9]:
meal
BB           92310
HB           14463
SC           10650
Undefined     1169
FB             798
Name: count, dtype: int64
In [10]:
df['children'].unique()
df['adults'].unique()
df['babies'].unique()
Out[10]:
array([ 0,  1,  2, 10,  9])
In [11]:
# ===================================================
# Ungültige Buchungen filtern
# -- Entfernt Buchungen ohne jegliche Gäste (Kinder, Erwachsene, Babys = 0)
# ===================================================
In [12]:
filter=(df['children']==0) & (df['adults']==0) & (df['babies']==0)
df[filter]
Out[12]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... deposit_type agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date
2224 Resort Hotel 0 1 2015 October 41 6 0 3 0 ... No Deposit 0.0 174.0 0 Transient-Party 0.00 0 0 Check-Out 10/6/2015
2409 Resort Hotel 0 0 2015 October 42 12 0 0 0 ... No Deposit 0.0 174.0 0 Transient 0.00 0 0 Check-Out 10/12/2015
3181 Resort Hotel 0 36 2015 November 47 20 1 2 0 ... No Deposit 38.0 0.0 0 Transient-Party 0.00 0 0 Check-Out 11/23/2015
3684 Resort Hotel 0 165 2015 December 53 30 1 4 0 ... No Deposit 308.0 0.0 122 Transient-Party 0.00 0 0 Check-Out 1/4/2016
3708 Resort Hotel 0 165 2015 December 53 30 2 4 0 ... No Deposit 308.0 0.0 122 Transient-Party 0.00 0 0 Check-Out 1/5/2016
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
115029 City Hotel 0 107 2017 June 26 27 0 3 0 ... No Deposit 7.0 0.0 0 Transient 100.80 0 0 Check-Out 6/30/2017
115091 City Hotel 0 1 2017 June 26 30 0 1 0 ... No Deposit 0.0 0.0 0 Transient 0.00 1 1 Check-Out 7/1/2017
116251 City Hotel 0 44 2017 July 28 15 1 1 0 ... No Deposit 425.0 0.0 0 Transient 73.80 0 0 Check-Out 7/17/2017
116534 City Hotel 0 2 2017 July 28 15 2 5 0 ... No Deposit 9.0 0.0 0 Transient-Party 22.86 0 1 Check-Out 7/22/2017
117087 City Hotel 0 170 2017 July 30 27 0 2 0 ... No Deposit 52.0 0.0 0 Transient 0.00 0 0 Check-Out 7/29/2017

180 rows × 32 columns

In [13]:
data=df[~filter]
In [14]:
# ===================================================
# Gästeherkunft analysieren (nur gültige, nicht stornierte Buchungen)
# -- Filtert Daten nach Hoteltyp und ob storniert wurde
# ===================================================
In [15]:
resort=data[ (data['hotel']=='Resort Hotel') & (data['is_canceled']==0) ]
city=data[(data['hotel']=='City Hotel') & (data['is_canceled']==0)]
In [16]:
resort.head()
city.head()
Out[16]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... deposit_type agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date
40060 City Hotel 0 6 2015 July 27 1 0 2 1 ... No Deposit 6.0 0.0 0 Transient 0.00 0 0 Check-Out 7/3/2015
40066 City Hotel 0 3 2015 July 27 2 0 3 1 ... No Deposit 1.0 0.0 0 Transient-Party 58.67 0 0 Check-Out 7/5/2015
40070 City Hotel 0 43 2015 July 27 3 0 2 2 ... No Deposit 1.0 0.0 0 Transient-Party 86.00 0 0 Check-Out 7/5/2015
40071 City Hotel 0 43 2015 July 27 3 0 2 2 ... No Deposit 1.0 0.0 0 Transient-Party 43.00 0 0 Check-Out 7/5/2015
40072 City Hotel 0 43 2015 July 27 3 0 2 2 ... No Deposit 1.0 0.0 0 Transient-Party 86.00 0 0 Check-Out 7/5/2015

5 rows × 32 columns

In [17]:
!pip install plotly
import plotly.graph_objs as go
from plotly.offline import iplot
import plotly.express as px
Requirement already satisfied: plotly in c:\users\miso\anaconda3\lib\site-packages (5.24.1)
Requirement already satisfied: tenacity>=6.2.0 in c:\users\miso\anaconda3\lib\site-packages (from plotly) (9.0.0)
Requirement already satisfied: packaging in c:\users\miso\anaconda3\lib\site-packages (from plotly) (24.2)
In [18]:
# ===================================================
# Kreisdiagramm zur Länderherkunft mit Plotly
# -- Visualisiert, aus welchen Ländern Gäste im Resort Hotel stammen
# ===================================================
In [19]:
labels=resort['country'].value_counts().index
values=resort['country'].value_counts()
In [20]:
trace=go.Pie(labels=labels, values=values,hoverinfo='label+percent', textinfo='value')
In [21]:
iplot([trace])
In [22]:
# ===================================================
# Choroplethenkarte: Anzahl Gäste nach Herkunftsland
# -- Visualisiert die geografische Verteilung der Gäste
# ===================================================
In [23]:
country_wise_data=data[data['is_canceled']==0]['country'].value_counts().reset_index()
country_wise_data.columns=['country','Number of Guests']
country_wise_data.head()
Out[23]:
country Number of Guests
0 PRT 20977
1 GBR 9668
2 FRA 8468
3 ESP 6383
4 DEU 6067
In [24]:
px.choropleth(country_wise_data,
              locations=country_wise_data['country'],
              color=country_wise_data['Number of Guests'],
              hover_name=country_wise_data['country'],
              title='Home country of Guests')
In [25]:
data.head()
Out[25]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... deposit_type agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date
0 Resort Hotel 0 342 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015
1 Resort Hotel 0 737 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015
2 Resort Hotel 0 7 2015 July 27 1 0 1 1 ... No Deposit 0.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015
3 Resort Hotel 0 13 2015 July 27 1 0 1 1 ... No Deposit 304.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015
4 Resort Hotel 0 14 2015 July 27 1 0 2 2 ... No Deposit 240.0 0.0 0 Transient 98.0 0 1 Check-Out 7/3/2015

5 rows × 32 columns

In [26]:
# ===================================================
# Zimmerpreise pro Zimmerart und Hoteltyp
# -- Vergleicht durchschnittliche Tagespreise für Zimmer
# ===================================================
In [27]:
data2=data[data['is_canceled']==0]
In [28]:
plt.figure(figsize=(12,8))
sns.boxplot(x='reserved_room_type', y='adr', data=data2, hue='hotel')
plt.title('Price of Room Types per Night and per person', fontsize=16)
plt.xlabel('Room_type')
plt.ylabel('Price in [EUR]')
plt.show()
No description has been provided for this image
In [29]:
# ===================================================
# Preisentwicklung übers Jahr hinweg
# -- Zeigt, wie sich die durchschnittlichen Preise pro Monat verändern
# ===================================================
In [30]:
data_resort=resort[resort['is_canceled']==0]
data_city=city[city['is_canceled']==0]
In [31]:
resort_hotel=data_resort.groupby('arrival_date_month')['adr'].mean().reset_index()
city_hotel=data_city.groupby('arrival_date_month')['adr'].mean().reset_index()
In [32]:
city_hotel
Out[32]:
arrival_date_month adr
0 April 111.962267
1 August 118.674598
2 December 88.401855
3 February 86.520062
4 January 82.330983
5 July 115.818019
6 June 117.874360
7 March 90.658533
8 May 120.669827
9 November 86.946592
10 October 102.004672
11 September 112.776582
In [33]:
final=resort_hotel.merge(city_hotel,on='arrival_date_month')
final.columns=['month','price_for_resort','price_for_city_hotel']
final.head()
Out[33]:
month price_for_resort price_for_city_hotel
0 April 75.867816 111.962267
1 August 181.205892 118.674598
2 December 68.410104 88.401855
3 February 54.147478 86.520062
4 January 48.761125 82.330983
In [34]:
!pip install sorted-months-weekdays
!pip install sort-dataframeby-monthorweek
Requirement already satisfied: sorted-months-weekdays in c:\users\miso\anaconda3\lib\site-packages (0.2)
Requirement already satisfied: sort-dataframeby-monthorweek in c:\users\miso\anaconda3\lib\site-packages (0.4)
In [35]:
from sort_dataframeby_monthorweek import Sort_Dataframeby_Month
In [36]:
final2=Sort_Dataframeby_Month(final,'month')
final2.head()
Out[36]:
month price_for_resort price_for_city_hotel
0 January 48.761125 82.330983
1 February 54.147478 86.520062
2 March 57.056838 90.658533
3 April 75.867816 111.962267
4 May 76.657558 120.669827
In [37]:
px.line(final2,x='month', y=['price_for_resort','price_for_city_hotel'],title='Room price per night over the year')
In [38]:
# ===================================================
# Analyse der Aufenthaltsdauer nach Marktsegment
# -- Veranschaulicht, wie lange Gäste je nach Marktsegment bleiben
# ===================================================
In [39]:
plt.figure(figsize=(15,10))
sns.boxplot(x='market_segment',y='stays_in_weekend_nights', data=data, hue='hotel')
Out[39]:
<Axes: xlabel='market_segment', ylabel='stays_in_weekend_nights'>
No description has been provided for this image
In [40]:
data['meal'].value_counts()
Out[40]:
meal
BB           92236
HB           14458
SC           10549
Undefined     1169
FB             798
Name: count, dtype: int64
In [41]:
# ===================================================
# Verteilung der gebuchten Mahlzeiten
# -- Zeigt, welche Verpflegungsarten am häufigsten gebucht wurden
# ===================================================
In [42]:
px.pie(data_frame=
       data['meal'].value_counts().rename_axis('meal').reset_index(name='count'),
             values='count',
             names='meal',
             hole=0.5)
In [43]:
data.head()
Out[43]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... deposit_type agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date
0 Resort Hotel 0 342 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015
1 Resort Hotel 0 737 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015
2 Resort Hotel 0 7 2015 July 27 1 0 1 1 ... No Deposit 0.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015
3 Resort Hotel 0 13 2015 July 27 1 0 1 1 ... No Deposit 304.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015
4 Resort Hotel 0 14 2015 July 27 1 0 2 2 ... No Deposit 240.0 0.0 0 Transient 98.0 0 1 Check-Out 7/3/2015

5 rows × 32 columns

In [44]:
# ===================================================
# Sonderwünsche und Stornierungen
# -- Zusammenhang zwischen Anzahl Sonderwünsche und Stornoverhalten
# ===================================================
In [52]:
sns.countplot(data=data.sample(1000), x='total_of_special_requests')
plt.show()
No description has been provided for this image
In [53]:
data.columns
Out[53]:
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
       'arrival_date_month', 'arrival_date_week_number',
       'arrival_date_day_of_month', 'stays_in_weekend_nights',
       'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
       'country', 'market_segment', 'distribution_channel',
       'is_repeated_guest', 'previous_cancellations',
       'previous_bookings_not_canceled', 'reserved_room_type',
       'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
       'company', 'days_in_waiting_list', 'customer_type', 'adr',
       'required_car_parking_spaces', 'total_of_special_requests',
       'reservation_status', 'reservation_status_date'],
      dtype='object')
In [57]:
pivot=data.groupby(['total_of_special_requests','is_canceled']).agg({'total_of_special_requests':'count'}).rename({'total_of_special_requests':'count'}).unstack()
pivot
Out[57]:
total_of_special_requests
is_canceled 0 1
total_of_special_requests
0 36667 33534
1 25867 7316
2 10086 2866
3 2049 445
4 304 36
5 38 2
In [58]:
pivot.plot(kind='bar')
Out[58]:
<Axes: xlabel='total_of_special_requests'>
No description has been provided for this image
In [59]:
# ===================================================
# Gästeströme über Monate analysieren
# -- Vergleich der Gästezahlen im Resort und City Hotel
# ===================================================
In [65]:
rush_resort=data_resort['arrival_date_month'].value_counts().reset_index()
rush_resort.columns=['month','number of guests']
rush_resort
Out[65]:
month number of guests
0 August 3257
1 July 3137
2 October 2575
3 March 2571
4 April 2550
5 May 2535
6 February 2308
7 September 2102
8 June 2037
9 December 2014
10 November 1975
11 January 1866
In [69]:
rush_city=data_city['arrival_date_month'].value_counts().reset_index()
rush_city.columns=['month','number of guests']
rush_city
Out[69]:
month number of guests
0 August 5367
1 July 4770
2 May 4568
3 June 4358
4 October 4326
5 September 4283
6 March 4049
7 April 4010
8 February 3051
9 November 2676
10 December 2377
11 January 2249
In [70]:
final_rush=rush_resort.merge(rush_city,on='month')
final_rush.columns=['month','number of guests in resort','number of guests in city hotel']
final_rush
Out[70]:
month number of guests in resort number of guests in city hotel
0 August 3257 5367
1 July 3137 4770
2 October 2575 4326
3 March 2571 4049
4 April 2550 4010
5 May 2535 4568
6 February 2308 3051
7 September 2102 4283
8 June 2037 4358
9 December 2014 2377
10 November 1975 2676
11 January 1866 2249
In [71]:
import sort_dataframeby_monthorweek as sd
In [72]:
final_rush2=sd.Sort_Dataframeby_Month(df=final_rush,monthcolumnname='month')
final_rush2
Out[72]:
month number of guests in resort number of guests in city hotel
0 January 1866 2249
1 February 2308 3051
2 March 2571 4049
3 April 2550 4010
4 May 2535 4568
5 June 2037 4358
6 July 3137 4770
7 August 3257 5367
8 September 2102 4283
9 October 2575 4326
10 November 1975 2676
11 December 2014 2377
In [73]:
final_rush2.columns
Out[73]:
Index(['month', 'number of guests in resort',
       'number of guests in city hotel'],
      dtype='object')
In [74]:
px.line(data_frame=final_rush2,x='month',y=['number of guests in resort','number of guests in city hotel'],title='Total number of guests per month')
In [75]:
filter=data['is_canceled']==0
clean_data=data[filter]
In [76]:
clean_data.head()
Out[76]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... deposit_type agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date
0 Resort Hotel 0 342 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015
1 Resort Hotel 0 737 2015 July 27 1 0 0 2 ... No Deposit 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015
2 Resort Hotel 0 7 2015 July 27 1 0 1 1 ... No Deposit 0.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015
3 Resort Hotel 0 13 2015 July 27 1 0 1 1 ... No Deposit 304.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015
4 Resort Hotel 0 14 2015 July 27 1 0 2 2 ... No Deposit 240.0 0.0 0 Transient 98.0 0 1 Check-Out 7/3/2015

5 rows × 32 columns

In [77]:
# ===================================================
# Aufenthaltsdauer vs. Hoteltyp
# -- Wie lange Gäste durchschnittlich bleiben – nach Hoteltyp
# ===================================================
In [78]:
clean_data['total_nights']=clean_data['stays_in_weekend_nights'] + clean_data['stays_in_week_nights']
C:\Users\Miso\AppData\Local\Temp\ipykernel_9756\3138704394.py:1: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [79]:
import warnings
from warnings import filterwarnings
filterwarnings('ignore')
In [80]:
clean_data.head()
Out[80]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date total_nights
0 Resort Hotel 0 342 2015 July 27 1 0 0 2 ... 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015 0
1 Resort Hotel 0 737 2015 July 27 1 0 0 2 ... 0.0 0.0 0 Transient 0.0 0 0 Check-Out 7/1/2015 0
2 Resort Hotel 0 7 2015 July 27 1 0 1 1 ... 0.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015 1
3 Resort Hotel 0 13 2015 July 27 1 0 1 1 ... 304.0 0.0 0 Transient 75.0 0 0 Check-Out 7/2/2015 1
4 Resort Hotel 0 14 2015 July 27 1 0 2 2 ... 240.0 0.0 0 Transient 98.0 0 1 Check-Out 7/3/2015 2

5 rows × 33 columns

In [81]:
stay=clean_data.groupby(['total_nights','hotel']).agg('count').reset_index()
stay=stay.iloc[:,0:3]
stay.head()
Out[81]:
total_nights hotel is_canceled
0 0 City Hotel 251
1 0 Resort Hotel 371
2 1 City Hotel 9155
3 1 Resort Hotel 6579
4 2 City Hotel 10983
In [83]:
stay=stay.rename(columns={'is_canceled':'number of stays'})
stay.head()
Out[83]:
total_nights hotel number of stays
0 0 City Hotel 251
1 0 Resort Hotel 371
2 1 City Hotel 9155
3 1 Resort Hotel 6579
4 2 City Hotel 10983
In [100]:
plt.figure(figsize=(15,8))
sns.barplot(x='total_nights',y='number of stays',hue='hotel',hue_order=['City Hotel','Resort Hotel'],data=stay)
Out[100]:
<Axes: xlabel='total_nights', ylabel='number of stays'>
No description has been provided for this image
In [85]:
clean_data.columns
Out[85]:
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
       'arrival_date_month', 'arrival_date_week_number',
       'arrival_date_day_of_month', 'stays_in_weekend_nights',
       'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
       'country', 'market_segment', 'distribution_channel',
       'is_repeated_guest', 'previous_cancellations',
       'previous_bookings_not_canceled', 'reserved_room_type',
       'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
       'company', 'days_in_waiting_list', 'customer_type', 'adr',
       'required_car_parking_spaces', 'total_of_special_requests',
       'reservation_status', 'reservation_status_date', 'total_nights'],
      dtype='object')
In [86]:
clean_data['market_segment'].value_counts()
Out[86]:
market_segment
Online TA        35673
Offline TA/TO    15880
Direct           10648
Groups            7697
Corporate         4291
Complementary      639
Aviation           183
Name: count, dtype: int64
In [87]:
# ===================================================
# Buchungen nach Marktsegment
# -- Verteilung und durchschnittlicher Preis je Segment und Zimmerart
# ===================================================
In [89]:
segment_counts = clean_data['market_segment'].value_counts().reset_index()
segment_counts.columns = ['market_segment', 'count']

# Step 2: Pass this to px.pie
fig = px.pie(
    segment_counts,
    names='market_segment',
    values='count',
    title='Bookings per Market Segment'
)

fig.show()
In [90]:
clean_data.columns
Out[90]:
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
       'arrival_date_month', 'arrival_date_week_number',
       'arrival_date_day_of_month', 'stays_in_weekend_nights',
       'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
       'country', 'market_segment', 'distribution_channel',
       'is_repeated_guest', 'previous_cancellations',
       'previous_bookings_not_canceled', 'reserved_room_type',
       'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
       'company', 'days_in_waiting_list', 'customer_type', 'adr',
       'required_car_parking_spaces', 'total_of_special_requests',
       'reservation_status', 'reservation_status_date', 'total_nights'],
      dtype='object')
In [91]:
plt.figure(figsize=(15,8))
sns.barplot(x='market_segment',y='adr',hue='reserved_room_type',data=clean_data)
Out[91]:
<Axes: xlabel='market_segment', ylabel='adr'>
No description has been provided for this image
In [92]:
cancel=data[data['is_canceled']==1]
cancel.head()
Out[92]:
hotel is_canceled lead_time arrival_date_year arrival_date_month arrival_date_week_number arrival_date_day_of_month stays_in_weekend_nights stays_in_week_nights adults ... deposit_type agent company days_in_waiting_list customer_type adr required_car_parking_spaces total_of_special_requests reservation_status reservation_status_date
8 Resort Hotel 1 85 2015 July 27 1 0 3 2 ... No Deposit 240.0 0.0 0 Transient 82.0 0 1 Canceled 5/6/2015
9 Resort Hotel 1 75 2015 July 27 1 0 3 2 ... No Deposit 15.0 0.0 0 Transient 105.5 0 0 Canceled 4/22/2015
10 Resort Hotel 1 23 2015 July 27 1 0 4 2 ... No Deposit 240.0 0.0 0 Transient 123.0 0 0 Canceled 6/23/2015
27 Resort Hotel 1 60 2015 July 27 1 2 5 2 ... No Deposit 240.0 0.0 0 Transient 107.0 0 2 Canceled 5/11/2015
32 Resort Hotel 1 96 2015 July 27 1 2 8 2 ... No Deposit 0.0 0.0 0 Transient 108.3 0 2 Canceled 5/29/2015

5 rows × 32 columns

In [93]:
# ===================================================
# Stornierungen nach Hoteltyp
# -- Wie viele Buchungen wurden je Hotel storniert?
# ===================================================
In [94]:
len(cancel[cancel['hotel']=='Resort Hotel'])
len(cancel[cancel['hotel']=='City Hotel'])
Out[94]:
33079
In [99]:
rh_cancellations = 11120
ch_cancellations = 33079

# Labels as strings
fig = px.pie(
    values=[rh_cancellations, ch_cancellations],
    names=['Resort Hotel', 'City Hotel'],  # or ['RH', 'CH'] if you prefer short
    title='Cancellations by Hotel Type'
)

fig.show()
In [ ]: