This project explores passenger data from the Titanic disaster to understand what factors influenced survival.
The dataset includes information about each passenger such as their age, gender, ticket class, fare, and whether they survived or not.
Discover patterns in survival based on class, gender, age, and other factors, and understand who was more likely to survive.
import pandas as pd
import sqlite3
df = pd.read_csv("titanic.csv")
conn = sqlite3.connect("titanic.db")
df.to_sql("titanic", conn, if_exists="replace", index=False)
SELECT Sex, COUNT(*)
FROM titanic
GROUP BY Sex;
SELECT ROUND(AVG(Survived)*100,2)
FROM titanic;
SELECT Sex,
COUNT(*) AS total,
SUM(Survived) AS survived,
ROUND(AVG(Survived)*100,2) AS survival_rate
FROM titanic
GROUP BY Sex;
SELECT Pclass,
COUNT(*) AS total,
SUM(Survived) AS survived,
ROUND(AVG(Survived)*100,2) AS survival_rate
FROM titanic
GROUP BY Pclass;
SELECT Pclass,
ROUND(AVG(Fare),2) AS avg_fare
FROM titanic
GROUP BY Pclass;
SELECT Survived,
ROUND(AVG(Age),2) AS avg_age
FROM titanic
GROUP BY Survived;
SELECT Pclass,
Sex,
COUNT(*) AS total,
SUM(Survived) AS survived,
ROUND(AVG(Survived)*100,2) AS survival_rate
FROM titanic
GROUP BY Pclass, Sex;
SELECT *
FROM titanic
WHERE Fare > (SELECT AVG(Fare) FROM titanic)
AND Survived = 1;
SELECT *
FROM titanic
ORDER BY Fare DESC
LIMIT 5;
SELECT
CASE
WHEN Fare < 10 THEN '0-10'
WHEN Fare < 50 THEN '10-50'
WHEN Fare < 100 THEN '50-100'
ELSE '100+'
END AS fare_range,
COUNT(*) AS total,
SUM(Survived) AS survived,
ROUND(AVG(Survived)*100,2) AS survival_rate
FROM titanic
GROUP BY fare_range;
SELECT (SibSp + Parch) AS family_size,
COUNT(*) AS total,
SUM(Survived) AS survived,
ROUND(AVG(Survived)*100,2) AS survival_rate
FROM titanic
GROUP BY family_size;
SELECT
CASE
WHEN (SibSp + Parch) = 0 THEN 'Alone'
ELSE 'Family'
END AS travel_type,
COUNT(*) AS total,
SUM(Survived) AS survived,
ROUND(AVG(Survived)*100,2) AS survival_rate
FROM titanic
GROUP BY travel_type;
SELECT
CASE
WHEN (SibSp + Parch) = 0 THEN 'Alone'
WHEN (SibSp + Parch) <= 3 THEN 'Small'
ELSE 'Large'
END AS family_bucket,
COUNT(*) AS total,
SUM(Survived) AS survived,
ROUND(AVG(Survived)*100,2) AS survival_rate
FROM titanic
GROUP BY family_bucket;
SELECT Embarked,
COUNT(*) AS total,
SUM(Survived) AS survived,
ROUND(AVG(Survived)*100,2) AS survival_rate
FROM titanic
GROUP BY Embarked;
SELECT *
FROM titanic
WHERE Survived = 1
AND Fare > (
SELECT AVG(Fare)
FROM titanic t2
WHERE t2.Pclass = titanic.Pclass
)
LIMIT 10;
Summary view of survival patterns across gender, class, and fare distribution.