Predicting Euro 2024 Playoff Results and Winner with Machine Learning and GPT 4.0 Chat (Part 2)

Disclaimer

The following predictions are not worth betting on, as they do not take into account the bookmaker's margin, the form of the teams, and many other factors. In general, sports betting is a very specific activity, largely based on human psychology, hidden weaknesses, etc., so in general, you should not bet on any predictions on the Internet.

Group stage prediction results:

  • Correct Predictions (+): 15

  • Incorrect Predictions (-): 21

  • Percentage of Correct Predictions: 41.67%

  • Sum of Coefficients for Correct Predictions: 32.2

The percentage of successful predictions was 16 percent lower than initially stated. At the same time, the percentage of passing was quite high in the first two rounds, but in the third round, where the issue of leaving the group was not decided for many teams, the percentage of successful predictions dropped significantly.

If we bet $100 on 36 matches, we would lose approximately $380.

Playoff Prediction:

library(randomForest)
library(dplyr)

# Загрузка данных
data <- read.csv("filtered_results.csv")

# Преобразование столбца date в формат даты
data$date <- as.Date(data$date, format="%Y-%m-%d")

# Создание целевой переменной (исключение ничьих)
data$result <- ifelse(data$home_score > data$away_score, 1, 0)

# Преобразование данных в единый формат
home_games <- data %>%
    select(team = home_team, opponent = away_team, score = home_score, opponent_score = away_score, result)

away_games <- data %>%
    select(team = away_team, opponent = home_team, score = away_score, opponent_score = home_score, result) %>%
    mutate(result = ifelse(result == 1, 0, 1))

all_games <- bind_rows(home_games, away_games)

# Создание новых признаков
team_stats <- all_games %>%
    group_by(team) %>%
    summarise(total_games = n(),
              total_win_rate = mean(result == 1),
              total_avg_score = mean(score))

# Подготовка данных для модели
data <- data %>%
    left_join(team_stats, by = c("home_team" = "team")) %>%
    rename(home_team_total_games = total_games,
           home_team_total_win_rate = total_win_rate,
           home_team_total_avg_score = total_avg_score) %>%
    left_join(team_stats, by = c("away_team" = "team")) %>%
    rename(away_team_total_games = total_games,
           away_team_total_win_rate = total_win_rate,
           away_team_total_avg_score = total_avg_score)

# Проверка и замена NA значений
data[is.na(data)] <- 0

# Подготовка данных для модели
features <- c("home_team_total_win_rate", "away_team_total_win_rate", 
              "home_team_total_games", "away_team_total_games", 
              "home_team_total_avg_score", "away_team_total_avg_score")
X <- data[features]
y <- factor(data$result)

# Разделение данных на обучающую и тестовую выборки
set.seed(42)
train_indices <- sample(seq_len(nrow(data)), size = 0.8 * nrow(data))
X_train <- X[train_indices, ]
y_train <- y[train_indices]
X_test <- X[-train_indices, ]
y_test <- y[-train_indices]

# Обучение модели Random Forest
rf_model <- randomForest(X_train, y_train, ntree=200, mtry=3, importance=TRUE)

# Предсказание на тестовой выборке
y_pred <- predict(rf_model, X_test)
accuracy <- sum(y_pred == y_test) / length(y_test)
print(paste("Accuracy:", accuracy))

# Функция для предсказания результатов матчей
predict_matches <- function(matches, model, stats) {
  matches <- matches %>%
    left_join(stats, by = c("home_team" = "team")) %>%
    rename(home_team_total_win_rate = total_win_rate,
           home_team_total_games = total_games,
           home_team_total_avg_score = total_avg_score) %>%
    left_join(stats, by = c("away_team" = "team")) %>%
    rename(away_team_total_win_rate = total_win_rate,
           away_team_total_games = total_games,
           away_team_total_avg_score = total_avg_score)

  matches[is.na(matches)] <- 0

  predictions <- predict(model, matches[features])
  results <- ifelse(predictions == 1, "Home Win", "Away Win")
  
  return(results)
}

# 1/8 финала
round_of_16 <- data.frame(
  home_team = c("Switzerland", "Germany", "England", "Spain", "France", "Portugal", "Romania", "Austria"),
  away_team = c("Italy", "Denmark", "Slovakia", "Georgia", "Belgium", "Slovenia", "Netherlands", "Turkey")
)

round_of_16_results <- predict_matches(round_of_16, rf_model, team_stats)
round_of_16

# 1/4 финала
quarterfinals <- data.frame(
  home_team = c(
    ifelse(round_of_16_results[1] == "Home Win", "Switzerland", "Italy"),
    ifelse(round_of_16_results[2] == "Home Win", "Germany", "Denmark"),
    ifelse(round_of_16_results[3] == "Home Win", "England", "Slovakia"),
    ifelse(round_of_16_results[4] == "Home Win", "Spain", "Georgia")
  ),
  away_team = c(
    ifelse(round_of_16_results[5] == "Home Win", "France", "Belgium"),
    ifelse(round_of_16_results[6] == "Home Win", "Portugal", "Slovenia"),
    ifelse(round_of_16_results[7] == "Home Win", "Romania", "Netherlands"),
    ifelse(round_of_16_results[8] == "Home Win", "Austria", "Turkey")
  )
)

quarterfinal_results <- predict_matches(quarterfinals, rf_model, team_stats)
quarterfinals

# 1/2 финала
semifinals <- data.frame(
  home_team = c(
    ifelse(quarterfinal_results[1] == "Home Win", quarterfinals$home_team[1], quarterfinals$away_team[1]),
    ifelse(quarterfinal_results[2] == "Home Win", quarterfinals$home_team[2], quarterfinals$away_team[2])
  ),
  away_team = c(
    ifelse(quarterfinal_results[3] == "Home Win", quarterfinals$home_team[3], quarterfinals$away_team[3]),
    ifelse(quarterfinal_results[4] == "Home Win", quarterfinals$home_team[4], quarterfinals$away_team[4])
  )
)

semifinal_results <- predict_matches(semifinals, rf_model, team_stats)
semifinals

# Финал
final <- data.frame(
  home_team = ifelse(semifinal_results[1] == "Home Win", semifinals$home_team[1], semifinals$away_team[1]),
  away_team = ifelse(semifinal_results[2] == "Home Win", semifinals$home_team[2], semifinals$away_team[2])
)

final_result <- predict_matches(final, rf_model, team_stats)
final

# Вывод результатов
print("1/8 финала результаты:")
for (i in 1:nrow(round_of_16)) {
  print(paste(round_of_16$home_team[i], "vs", round_of_16$away_team[i], "-> Prediction:", round_of_16_results[i]))
}

print("1/4 финала результаты:")
for (i in 1:nrow(quarterfinals)) {
  print(paste(quarterfinals$home_team[i], "vs", quarterfinals$away_team[i], "-> Prediction:", quarterfinal_results[i]))
}

print("1/2 финала результаты:")
for (i in 1:nrow(semifinals)) {
  print(paste(semifinals$home_team[i], "vs", semifinals$away_team[i], "-> Prediction:", semifinal_results[i]))
}

print("Финал результат:")
print(paste(final$home_team, "vs", final$away_team, "-> Prediction:", final_result))

1/8 finals:

[1] “Switzerland vs Italy -> Prediction: Away Win”

[1] “Germany vs Denmark -> Prediction: Away Win”

[1] “England vs Slovakia -> Prediction: Home Win”

[1] “Spain vs Georgia -> Prediction: Home Win”

[1] “France vs Belgium -> Prediction: Home Win”

[1] “Portugal vs Slovenia -> Prediction: Home Win”

[1] “Romania vs Netherlands -> Prediction: Away Win”

[1] “Austria vs Turkey -> Prediction: Away Win”

The accuracy percentage has increased – “Accuracy: 0.724324324324324”. Logical, considering the game is for two results.

Next, I will give a forecast in the comments depending on the actual pairings.

Predicted ending: “England vs Spain -> Prediction: Away Win”

Spain was again declared the winner, the odds for victory decreased from 9.00 to 5.50.

Similar Posts

Leave a Reply

Your email address will not be published. Required fields are marked *