#include "ftrl.h"
#include "log/log.h"

// Follow-the-regularized leader (FTRL) Proximal

ftrl_trainer_t *ftrl_trainer_new(size_t m, size_t n, bool fit_intercept, double alpha, double beta, double lambda1, double lambda2) {
    ftrl_trainer_t *trainer = malloc(sizeof(ftrl_trainer_t));
    if (trainer == NULL) return NULL;

    trainer->z = double_matrix_new_zeros(m, n);
    if (trainer->z == NULL) {
        goto exit_ftrl_trainer_created;
    }

    trainer->fit_intercept = fit_intercept;
    trainer->alpha = alpha;
    trainer->beta = beta;
    trainer->lambda1 = lambda1;
    trainer->lambda2 = lambda2;

    trainer->num_features = m;

    trainer->learning_rates = double_array_new_zeros(m);
    if (trainer->learning_rates == NULL) {
        goto exit_ftrl_trainer_created;
    }

    return trainer;
exit_ftrl_trainer_created:
    ftrl_trainer_destroy(trainer);
    return NULL;
}

bool ftrl_trainer_reset_params(ftrl_trainer_t *self, double alpha, double beta, double lambda1, double lambda2) {
    double_matrix_zero(self->z);
    double_array_zero(self->learning_rates->a, self->learning_rates->n);

    self->alpha = alpha;
    self->beta = beta;
    self->lambda1 = lambda1;
    self->lambda2 = lambda2;
    return true;
}


bool ftrl_trainer_extend(ftrl_trainer_t *self, size_t m) {
    if (self == NULL || self->z == NULL || self->learning_rates == NULL) return false;

    if (!double_matrix_resize_fill_zeros(self->z, m, self->z->n) ||
        !double_array_resize_fill_zeros(self->learning_rates, m)) {
        return false;
    }

    self->num_features = m;

    return true;
}

bool ftrl_set_weights(ftrl_trainer_t *self, double_matrix_t *w, uint32_array *indices) {
    if (self == NULL || w == NULL) return false;

    size_t m = self->z->m;
    size_t n = self->z->n;

    size_t num_indices = m;

    if (indices != NULL) {
        num_indices = indices->n;
    }

    if (!double_matrix_resize(w, num_indices, n)) {
        log_error("Resizing weights failed\n");
        return false;
    }

    double lambda1 = self->lambda1;
    double lambda2 = self->lambda2;

    double_matrix_t *z = self->z;
    double *learning_rates = self->learning_rates->a;

    double alpha = self->alpha;
    double beta = self->beta;

    uint32_t *row_indices = NULL;

    if (indices != NULL) {
        row_indices = indices->a;
    }

    uint32_t row_idx;
    size_t i_start = self->fit_intercept ? 1 : 0;

    for (size_t i = 0; i < num_indices; i++) {
        if (indices != NULL) {
            row_idx = row_indices[i];            
        } else {
            row_idx = i;
        }

        double *row = double_matrix_get_row(z, (size_t)row_idx);
        double lr = learning_rates[row_idx];
        double *weights_row = double_matrix_get_row(w, i);

        if (row_idx >= i_start) {
            for (size_t j = 0; j < n; j++) {
                double z_ij = row[j];
                double sign_z_ij = sign(z_ij);
                if (sign_z_ij * z_ij > lambda1) {
                    double w_ij = -(1.0/(((beta + sqrt(lr)) / alpha) + lambda2)) * (z_ij - sign_z_ij * lambda1);
                    weights_row[j] = w_ij;
                } else {
                    weights_row[j] = 0.0;
                }
            }
        } else {
            for (size_t j = 0; j < n; j++) {
                double z_ij = row[j];
                double w_ij = -(1.0/((beta + sqrt(lr)) / alpha)) * z_ij;
                weights_row[j] = w_ij;
            }
        }
    }

    return true;
}

bool ftrl_update_gradient(ftrl_trainer_t *self, double_matrix_t *gradient, double_matrix_t *weights, uint32_array *indices, size_t batch_size) {
    if (self == NULL || indices == NULL || gradient == NULL || gradient->m != weights->m || gradient->n != weights->n) {
        if (indices == NULL) {
            log_error("indices was NULL\n");
        }
        log_error("gradient->m = %zu, gradient->n = %zu, weights->m = %zu, weights->n = %zu\n", gradient->m, gradient->n, weights->m, weights->n);
        return false;
    }

    size_t m = self->z->m;
    size_t n = self->z->n;

    size_t num_indices = indices->n;
    uint32_t *row_indices = indices->a;

    double_matrix_t *z = self->z;

    double *learning_rates = self->learning_rates->a;

    double alpha = self->alpha;

    for (size_t i = 0; i < num_indices; i++) {
        uint32_t row_idx = row_indices[i];
        if (row_idx >= m) {
            log_error("row_idx = %u, m = %zu\n", row_idx, m);
            return false;
        }
        double lr = learning_rates[row_idx];

        double *weights_row = double_matrix_get_row(weights, i);
        double *gradient_row = double_matrix_get_row(gradient, i);
        double *z_row = double_matrix_get_row(z, row_idx);

        double lr_update = lr;

        for (size_t j = 0; j < n; j++) {
            double grad_ij = gradient_row[j];
            lr_update += grad_ij * grad_ij;
        }

        double sigma = (1.0 / (alpha * batch_size)) * (sqrt(lr_update) - sqrt(lr));

        for (size_t j = 0; j < n; j++) {
            double z_ij_update = gradient_row[j] - sigma * weights_row[j];
            z_row[j] += z_ij_update;
        }

        learning_rates[row_idx] = lr_update;
    }

    return true;
}

double ftrl_reg_cost(ftrl_trainer_t *self, double_matrix_t *theta, uint32_array *update_indices, size_t batch_size) {
    double cost = 0.0;

    size_t m = theta->m;
    size_t n = theta->n;

    uint32_t *indices = NULL;
    size_t num_indices = m;

    if (update_indices != NULL) {
        uint32_t *indices = update_indices->a;
        size_t num_indices = update_indices->n;
    }
    size_t i_start = self->fit_intercept ? 1 : 0;

    double lambda1 = self->lambda1;
    double lambda2 = self->lambda2;

    double l2_cost = 0.0;
    double l1_cost = 0.0;

    for (size_t i = 0; i < m; i++) {
        uint32_t row_idx = i;
        if (indices != NULL) {
            row_idx = indices[i];
        }

        if (row_idx >= i_start) {
            double *theta_i = double_matrix_get_row(theta, i);

            l2_cost += double_array_l2_norm(theta_i, n);
            l1_cost += double_array_l1_norm(theta_i, n);
        }
    }

    cost += lambda2 / 2.0 * l2_cost;
    cost += lambda1 * l1_cost;

    return cost * 1.0 / (double)batch_size;
}


double_matrix_t *ftrl_weights_finalize(ftrl_trainer_t *self) {
    if (!ftrl_set_weights(self, self->z, NULL)) {
        return NULL;
    }

    double_matrix_t *weights = self->z;
    self->z = NULL;
    return weights;
}


sparse_matrix_t *ftrl_weights_finalize_sparse(ftrl_trainer_t *self) {
    size_t m = self->z->m;
    size_t n = self->z->n;

    double *learning_rates = self->learning_rates->a;

    double alpha = self->alpha;
    double beta = self->beta;
    double lambda1 = self->lambda1;
    double lambda2 = self->lambda2;

    sparse_matrix_t *weights = sparse_matrix_new();
    log_info("weights->m = %" PRIu32 "\n", weights->m);

    size_t i_start = 0;

    if (self->fit_intercept) {
        double *row = double_matrix_get_row(self->z, 0);
        double lr = learning_rates[0];

        for (size_t j = 0; j < n; j++) {
            double z_ij = row[j];
            double w_ij = -(1.0/((beta + sqrt(lr)) / alpha)) * z_ij;
            sparse_matrix_append(weights, j, w_ij);
        }
        sparse_matrix_finalize_row(weights);
        i_start = 1;
    }
    log_info("after intercept weights->m = %" PRIu32 "\n", weights->m);

    for (size_t i = i_start; i < m; i++) {
        double *row = double_matrix_get_row(self->z, (size_t)i);
        double lr = learning_rates[i];
        for (size_t j = 0; j < n; j++) {
            double z_ij = row[j];
            double sign_z_ij = sign(z_ij);
            if (sign_z_ij * z_ij > lambda1) {
                double w_ij = -(1.0/(((beta + sqrt(lr)) / alpha) + lambda2)) * (z_ij - sign_z_ij * lambda1);
                sparse_matrix_append(weights, j, w_ij);
            }
        }
        sparse_matrix_finalize_row(weights);

        if (i % 1000 == 0 && i > 0) {
            log_info("adding rows, weights->m = %" PRIu32 "\n", weights->m);
        }
    }

    return weights;
}


void ftrl_trainer_destroy(ftrl_trainer_t *self) {
    if (self == NULL) return;

    if (self->z != NULL) {
        double_matrix_destroy(self->z);
    }

    if (self->learning_rates != NULL) {
        double_array_destroy(self->learning_rates);
    }

    free(self);
}