import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import os

# =======================================================
# 📂 Laden und Filtern der CSV-Dateien
# - Listet die letzten 8 Dateien im Uber-Datenordner auf
# - Entfernt ungewünschte Dateien aus der Analyse
# =======================================================

files = os.listdir(r'C:\Users\Miso\Desktop\Data Analysis 1\UberDataSet')[-8:]
files

['uber-raw-data-apr14.csv',
 'uber-raw-data-aug14.csv',
 'uber-raw-data-janjune-15.csv',
 'uber-raw-data-janjune-15_sample.csv',
 'uber-raw-data-jul14.csv',
 'uber-raw-data-jun14.csv',
 'uber-raw-data-may14.csv',
 'uber-raw-data-sep14.csv']

files.remove('uber-raw-data-janjune-15_sample.csv')

files.remove('uber-raw-data-janjune-15.csv')

files

['uber-raw-data-apr14.csv',
 'uber-raw-data-aug14.csv',
 'uber-raw-data-jul14.csv',
 'uber-raw-data-jun14.csv',
 'uber-raw-data-may14.csv',
 'uber-raw-data-sep14.csv']

# =======================================================
# 📁 Zusammenführen der ausgewählten CSV-Dateien
# - Liest jede Datei ein
# - Hängt alle Daten zu einem großen DataFrame `final` zusammen
# =======================================================

path = r'C:\Users\Miso\Desktop\Data Analysis 1\UberDataSet'

final=pd.DataFrame()

for file in files:
    df=pd.read_csv(path+"/"+file,encoding='utf-8')
    final=pd.concat([df,final])

final.shape

(4534327, 4)

df = final.copy()

df.head()

df.dtypes

Date/Time     object
Lat          float64
Lon          float64
Base          object
dtype: object

df['Date/Time'] = pd.to_datetime(df['Date/Time'],format='%m/%d/%Y %H:%M:%S')

df.dtypes

Date/Time    datetime64[ns]
Lat                 float64
Lon                 float64
Base                 object
dtype: object

df.head()

# =======================================================
# 🧹 Datenvorverarbeitung
# - Umwandlung der Spalte 'Date/Time' in ein datetime-Format
# - Extraktion von Zeitkomponenten für spätere Analysen
# =======================================================

df['Weekday'] = df['Date/Time'].dt.day_name()
df['Month'] = df['Date/Time'].dt.month
df['Day'] = df['Date/Time'].dt.day
df['Hour'] = df['Date/Time'].dt.hour
df['Minute'] = df['Date/Time'].dt.minute

df.head()

df['Weekday'].value_counts()

Weekday
Thursday     755145
Friday       741139
Wednesday    696488
Tuesday      663789
Saturday     646114
Monday       541472
Sunday       490180
Name: count, dtype: int64

# =======================================================
# 📊 Wochentagsverteilung visualisieren
# - Zählt Fahrten pro Wochentag
# - Erstellt eine interaktive Balkengrafik mit Plotly
# =======================================================

px.bar(x=df['Weekday'].value_counts().index,
       y=df['Weekday'].value_counts()
      )

# =======================================================
# 🕒 Stundenverteilung pro Monat
# - Zeigt in Histogrammen, wann (zu welcher Uhrzeit) im jeweiligen Monat die meisten Fahrten stattfanden
# =======================================================

plt.figure(figsize=(40,20))

for i, month in enumerate(df['Month'].unique()):
    plt.subplot(3,2,i+1)
    df[df['Month']==month]['Hour'].hist()

!pip install chart-studio
import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

Requirement already satisfied: chart-studio in c:\users\miso\anaconda3\lib\site-packages (1.1.0)
Requirement already satisfied: plotly in c:\users\miso\anaconda3\lib\site-packages (from chart-studio) (5.24.1)
Requirement already satisfied: requests in c:\users\miso\anaconda3\lib\site-packages (from chart-studio) (2.32.3)
Requirement already satisfied: retrying>=1.3.3 in c:\users\miso\anaconda3\lib\site-packages (from chart-studio) (1.4.1)
Requirement already satisfied: six in c:\users\miso\anaconda3\lib\site-packages (from chart-studio) (1.17.0)
Requirement already satisfied: tenacity>=6.2.0 in c:\users\miso\anaconda3\lib\site-packages (from plotly->chart-studio) (9.0.0)
Requirement already satisfied: packaging in c:\users\miso\anaconda3\lib\site-packages (from plotly->chart-studio) (24.2)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\miso\anaconda3\lib\site-packages (from requests->chart-studio) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in c:\users\miso\anaconda3\lib\site-packages (from requests->chart-studio) (3.7)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\miso\anaconda3\lib\site-packages (from requests->chart-studio) (2.3.0)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\miso\anaconda3\lib\site-packages (from requests->chart-studio) (2025.7.14)

df.groupby('Month')['Hour'].count()

Month
4     564516
5     652435
6     663844
7     796121
8     829275
9    1028136
Name: Hour, dtype: int64

# =======================================================
# 📈 Monatsverteilung der Fahrtenanzahl
# - Zeigt, wie viele Fahrten es pro Monat gibt
# - Interaktive Darstellung mit Plotly
# =======================================================

trace=go.Bar(
    x = df.groupby('Month')['Hour'].count().index,
    y = df.groupby('Month')['Hour'].count(),
    name = 'Priority'
)

iplot([trace])

# =======================================================
# 📅 Verteilung der Fahrten innerhalb eines Monats
# - Histogramm der Tagesanzahl pro Monat
# =======================================================

plt.figure(figsize=(10,8))
plt.hist(df['Day'],bins=30, rwidth=0.5) # adding range=(0.5,30.5)) erzeugt die gleiche wie unten
plt.xlabel('Date of the month')
plt.ylabel('Total Journeys')
plt.title('Journeys by month day')

Text(0.5, 1.0, 'Journeys by month day')

sns.displot(df['Day'])

<seaborn.axisgrid.FacetGrid at 0x1b7857d16a0>

# =======================================================
# 📅📦 Tagesverteilung pro Monat
# - Histogramm für jeden Monat: Welche Tage im Monat waren besonders fahrtenintensiv?
# =======================================================

plt.figure(figsize=(40,20))

for i, Month in enumerate (df['Month'].unique(),1):
    plt.subplot(3,2,i)
    df_out=df[df['Month']==Month]
    plt.hist(df_out['Day'])
    plt.xlabel('Days in Month{}'.format(Month))
    plt.ylabel('total_rides')

# =======================================================
# 📍 Zusammenhang von Uhrzeit und geographischer Breite
# - Scatterplot zur Untersuchung des Zusammenhangs zwischen Uhrzeit und Ort
# =======================================================

plt.figure(figsize=(12,6))
sns.pointplot(x='Hour', y='Lat', data=df, hue='Weekday')
#plt.set_title('Hours off day vs Latitude of Passenger')

<Axes: xlabel='Hour', ylabel='Lat'>

# =======================================================
# 📊 Fahrtenanzahl nach Monat und Basisstation
# - Gruppiert Fahrten nach Monat und Base-Code
# - Liniendiagramm zur Visualisierung
# =======================================================

base = df.groupby(['Base', 'Month'])['Date/Time'].count().reset_index()
base

plt.figure(figsize=(10,6))
sns.lineplot(x='Month', y='Date/Time', hue='Base', data=base)

<Axes: xlabel='Month', ylabel='Date/Time'>

# =======================================================
# 🔥 Heatmap: Anzahl der Fahrten pro Wochentag und Stunde
# - Gruppiert Fahrten nach Tag und Stunde
# - Erstellt eine Heatmap
# =======================================================

def count_rows(rows):
    return len (rows)

by_cross = df.groupby(['Weekday', 'Hour']).apply(count_rows)
by_cross

C:\Users\Miso\AppData\Local\Temp\ipykernel_11320\3792439153.py:1: DeprecationWarning:

DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.

Weekday    Hour
Friday     0       13716
           1        8163
           2        5350
           3        6930
           4        8806
                   ...  
Wednesday  19      47017
           20      47772
           21      44553
           22      32868
           23      18146
Length: 168, dtype: int64

pivot = by_cross.unstack()
pivot

plt.figure(figsize=(12,6))
sns.heatmap(pivot)

<Axes: xlabel='Hour', ylabel='Weekday'>

def heatmap(col1, col2):
    by_cross = df.groupby([col1, col2]).apply(count_rows)
    pivot = by_cross.unstack()
    plt.figure(figsize=(12,6))
    return sns.heatmap(pivot)

heatmap('Day', 'Hour')

C:\Users\Miso\AppData\Local\Temp\ipykernel_11320\3216948876.py:2: DeprecationWarning:

DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.

<Axes: xlabel='Hour', ylabel='Day'>

# =======================================================
# 🗺️ Darstellung aller Uber-Fahrten als Punktekarte
# - Sehr fein aufgelöstes Scatter-Plot der GPS-Koordinaten
# =======================================================

plt.figure(figsize=(12,6))
plt.plot(df['Lon'], df['Lat'], 'r+', ms=0.5)
plt.xlim(-74.2, -73.7)
plt.ylim(40.6,41)

(40.6, 41.0)

# =======================================================
# 🗺️ Heatmap für Fahrten an Sonntagen
# - Gruppiert nach GPS-Koordinaten
# - Nutzt Folium zur Erstellung einer interaktiven Karte
# =======================================================

df_out=df[df['Weekday']=='Sunday']
#df_out.shape
#df_out.head()
rush=df_out.groupby(['Lat','Lon'])['Weekday'].count().reset_index()
rush.columns=['Lat', 'Lon', 'Number of Trips']
rush

!pip install folium
from folium.plugins import HeatMap
import folium
basemap=folium.Map()

Requirement already satisfied: folium in c:\users\miso\anaconda3\lib\site-packages (0.20.0)
Requirement already satisfied: branca>=0.6.0 in c:\users\miso\anaconda3\lib\site-packages (from folium) (0.8.1)
Requirement already satisfied: jinja2>=2.9 in c:\users\miso\anaconda3\lib\site-packages (from folium) (3.1.6)
Requirement already satisfied: numpy in c:\users\miso\anaconda3\lib\site-packages (from folium) (2.1.3)
Requirement already satisfied: requests in c:\users\miso\anaconda3\lib\site-packages (from folium) (2.32.3)
Requirement already satisfied: xyzservices in c:\users\miso\anaconda3\lib\site-packages (from folium) (2022.9.0)
Requirement already satisfied: MarkupSafe>=2.0 in c:\users\miso\anaconda3\lib\site-packages (from jinja2>=2.9->folium) (3.0.2)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\miso\anaconda3\lib\site-packages (from requests->folium) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in c:\users\miso\anaconda3\lib\site-packages (from requests->folium) (3.7)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\miso\anaconda3\lib\site-packages (from requests->folium) (2.3.0)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\miso\anaconda3\lib\site-packages (from requests->folium) (2025.7.14)

HeatMap(rush,zoom=20,radius=15).add_to(basemap)
basemap

# =======================================================
# 🗺️ Allgemeine Funktion zur Erstellung von Heatmaps je Wochentag
# - Erstellt interaktive Karten für beliebige Tage
# =======================================================

def plot(df, day):
    basemap=folium.Map()
    df_out=df[df['Weekday']==day]
    HeatMap(df_out.groupby(['Lat', 'Lon'])['Weekday'].count().reset_index(),zoom=20, radius=15).add_to(basemap)
    return basemap

plot(df, 'Saturday')

uber_15=pd.read_csv(r'C:\Users\Miso\Desktop\Data Analysis 1\UberDataSet\uber-raw-data-janjune-15_sample.csv')

	Date/Time	Lat	Lon	Base
0	9/1/2014 0:01:00	40.2201	-74.0021	B02512
1	9/1/2014 0:01:00	40.7500	-74.0027	B02512
2	9/1/2014 0:03:00	40.7559	-73.9864	B02512
3	9/1/2014 0:06:00	40.7450	-73.9889	B02512
4	9/1/2014 0:11:00	40.8145	-73.9444	B02512

	Date/Time	Lat	Lon	Base
0	2014-09-01 00:01:00	40.2201	-74.0021	B02512
1	2014-09-01 00:01:00	40.7500	-74.0027	B02512
2	2014-09-01 00:03:00	40.7559	-73.9864	B02512
3	2014-09-01 00:06:00	40.7450	-73.9889	B02512
4	2014-09-01 00:11:00	40.8145	-73.9444	B02512

	Date/Time	Lat	Lon	Base	Weekday	Month	Day	Minute
0	2014-09-01 00:01:00	40.2201	-74.0021	B02512	Monday	9	1	1
1	2014-09-01 00:01:00	40.7500	-74.0027	B02512	Monday	9	1	1
2	2014-09-01 00:03:00	40.7559	-73.9864	B02512	Monday	9	1	3
3	2014-09-01 00:06:00	40.7450	-73.9889	B02512	Monday	9	1	6
4	2014-09-01 00:11:00	40.8145	-73.9444	B02512	Monday	9	1	11

	Base	Month	Date/Time
0	B02512	4	35536
1	B02512	5	36765
2	B02512	6	32509
3	B02512	7	35021
4	B02512	8	31472
5	B02512	9	34370
6	B02598	4	183263
7	B02598	5	260549
8	B02598	6	242975
9	B02598	7	245597
10	B02598	8	220129
11	B02598	9	240600
12	B02617	4	108001
13	B02617	5	122734
14	B02617	6	184460
15	B02617	7	310160
16	B02617	8	355803
17	B02617	9	377695
18	B02682	4	227808
19	B02682	5	222883
20	B02682	6	194926
21	B02682	7	196754
22	B02682	8	173280
23	B02682	9	197138
24	B02764	4	9908
25	B02764	5	9504
26	B02764	6	8974
27	B02764	7	8589
28	B02764	8	48591
29	B02764	9	178333

Hour	0	1	2	3	4	5	6	7	8	9	...	14	15	16	17	18	19	20	21	22	23
Weekday
Friday	13716	8163	5350	6930	8806	13450	23412	32061	31509	25230	...	36206	43673	48169	51961	54762	49595	43542	48323	49409	41260
Monday	6436	3737	2938	6232	9640	15032	23746	31159	29265	22197	...	28157	32744	38770	42023	37000	34159	32849	28925	20158	11811
Saturday	27633	19189	12710	9542	6846	7084	8579	11014	14411	17669	...	31418	38769	43512	42844	45883	41098	38714	43826	47951	43174
Sunday	32877	23015	15436	10597	6374	6169	6596	8728	12128	16401	...	28151	31112	33038	31521	28291	25948	25076	23967	19566	12166
Thursday	9293	5290	3719	5637	8505	14169	27065	37038	35431	27812	...	36699	44442	50560	56704	55825	51907	51990	51953	44194	27764
Tuesday	6237	3509	2571	4494	7548	14241	26872	36599	33934	25023	...	34846	41338	48667	55500	50186	44789	44661	39913	27712	14869
Wednesday	7644	4324	3141	4855	7511	13794	26943	36495	33826	25635	...	35148	43388	50684	55637	52732	47017	47772	44553	32868	18146

	Lat	Lon	Number of Trips
0	39.9374	-74.0722	1
1	39.9378	-74.0721	1
2	39.9384	-74.0742	1
3	39.9385	-74.0734	1
4	39.9415	-74.0736	1
...	...	...	...
209225	41.3141	-74.1249	1
209226	41.3180	-74.1298	1
209227	41.3195	-73.6905	1
209228	41.3197	-73.6903	1
209229	42.1166	-72.0666	1