import numpy as np
from collections import defaultdict
import concurrent.futures

class LinearRegression:
    def __init__(self, data, weighted=False, forced_to_zero=True):
        self.data = data
        self.weighted = weighted
        self.forced_to_zero = forced_to_zero
        self._process_and_get_all_values()
        self._valid_points()
        self.a, self.b = self._calculate_a_b()

    def _process_and_get_all_values(self):
        sum_x_standards = defaultdict(list)
        sum_y_standards = defaultdict(list)
        sum_y_blanks = []
        
        all_y_values, all_identifiers, all_wells, all_x_values = [], [], [], []

        for entry in self.data:
            identifier, x, y = entry['identifier'], entry.get('x', []), entry['y']
            all_y_values.extend(y)
            all_identifiers.extend([identifier] * len(y))
            all_wells.extend([entry['coordinates']] * len(y))
            all_x_values.extend(x if x else [0] * len(y))

            if identifier.startswith('S'):
                sum_x_standards[identifier].extend(x)
                sum_y_standards[identifier].extend(y)
            elif identifier == 'B':
                sum_y_blanks.extend(y)

        self.averages_standards = {
            identifier: {'avg_x': np.mean(x) if x else None, 'avg_y': np.mean(y)}
            for identifier, (x, y) in zip(sum_x_standards.keys(), zip(sum_x_standards.values(), sum_y_standards.values()))
        }
        self.avg_y_blanks = np.mean(sum_y_blanks) if sum_y_blanks else None
        self.x = [v['avg_x'] for v in self.averages_standards.values() if v['avg_x'] is not None]
        self.y = [v['avg_y'] for v in self.averages_standards.values()]
        self.blank_y = [self.avg_y_blanks] if self.avg_y_blanks is not None else []
        
        self.all_identifiers, self.all_wells, self.all_x_values, self.all_y_values = all_identifiers, all_wells, all_x_values, all_y_values

    def _valid_points(self):
        try:
            self.x_values = np.array([float(x) for x in self.x if x])
            self.y_values = np.array([y for y in self.y if y])
            self.weights = np.array([1 / (y ** 2) if self.weighted else 1 for y in self.y if y])
        except ValueError as e:
            raise ValueError(f"Error converting points to float: {e}")

    def _calculate_a_b(self):
        try:
            if self.forced_to_zero:
                return self._calculate_a_b_forced_to_zero()
            return self._calculate_a_b_weighted() if self.weighted else self._calculate_a_b_unweighted()
        except Exception as e:
            raise ValueError(f"Error calculating a and b: {e}")

    def _calculate_a_b_unweighted(self):
        x_mean, y_mean = np.mean(self.x_values), np.mean(self.y_values)
        b = np.sum((self.x_values - x_mean) * (self.y_values - y_mean)) / np.sum((self.x_values - x_mean) ** 2)
        a = y_mean - (b * x_mean)
        return a, b

    def _calculate_a_b_weighted(self):
        x_mean_w = np.average(self.x_values, weights=self.weights)
        y_mean_w = np.average(self.y_values, weights=self.weights)
        b_num = np.sum(self.weights * (self.x_values - x_mean_w) * (self.y_values - y_mean_w))
        b_den = np.sum(self.weights * (self.x_values - x_mean_w) ** 2)
        b = b_num / b_den if b_den != 0 else 0
        a = y_mean_w - (b * x_mean_w)
        return a, b

    def _calculate_a_b_forced_to_zero(self):
        if self.weighted:
            numerator = np.sum(self.weights * self.x_values * self.y_values)
            denominator = np.sum(self.weights * self.x_values ** 2)
        else:
            numerator = np.sum(self.x_values * self.y_values)
            denominator = np.sum(self.x_values ** 2)
            
        b = numerator / denominator
        return 0, b

    def _pred_unweighted(self, x_values):
        return self.a + (self.b * x_values)

    def _calculate_metrics(self):
        try:
            x_unique = np.unique(self.x_values)
            y_avg = np.array([np.mean(self.y_values[self.x_values == xi]) for xi in x_unique])
            y_pred = self._pred_unweighted(x_unique)
            rss = np.sum((y_avg - y_pred) ** 2)
            tss = np.sum((y_avg - np.mean(y_avg)) ** 2)
            r_squared = 1 - (rss / tss)
            n = len(y_avg)
            adjusted_r_squared = 1 - ((1 - r_squared) * ((n - 1) / (n - 2))) if n > 2 else r_squared
            return r_squared, adjusted_r_squared, rss
        except Exception as e:
            raise ValueError(f"Error calculating metrics: {e}")

    def _calculate_lld(self):
        try:
            if not self.blank_y:
                return None
            mean_blank = np.mean(self.blank_y)
            std_blank = np.std(self.blank_y, ddof=1) if len(self.blank_y) > 1 else 0
            min_concentration, max_concentration = np.min(self.x_values), np.max(self.x_values)
            curvemin, curvemax = self.a + self.b * min_concentration, self.a + self.b * max_concentration
            return (mean_blank + (2 * 1.645 * std_blank) - self.a) / self.b if curvemax > curvemin else (mean_blank - (2 * 1.645 * std_blank) - self.a) / self.b
        except Exception as e:
            raise ValueError(f"Error calculating LLD: {e}")

    def _calculate_cv(self):
        try:
            standard_concentrations = defaultdict(list)
            for entry in self.data:
                if entry['identifier'].startswith('S'):
                    standard_concentrations[entry['identifier']].extend((y - self.a) / self.b for y in entry['y'])
            return {
                identifier: (np.std(concentrations, ddof=1) / np.mean(concentrations) * 100) if len(concentrations) >= 2 else None
                for identifier, concentrations in standard_concentrations.items()
            }
        except Exception as e:
            raise ValueError(f"Error calculating CV: {e}")

    def _process_well(self, index, cvs):
        try:
            fi_endpoint = self.all_y_values[index]
            concentration_predicted = (fi_endpoint - self.a) / self.b if self.a is not None and self.b is not None else None
            concentration_actual = self.all_x_values[index]
            concentration_difference = ((concentration_predicted - concentration_actual) / concentration_actual * 100) if concentration_actual != 0 else None
            return {
                "identifier": self.all_identifiers[index],
                "well": self.all_wells[index],
                "concentration_predicted": concentration_predicted,
                "%CV": cvs.get(self.all_identifiers[index], None),
                "concentration_difference": concentration_difference
            }
        except Exception as e:
            raise ValueError(f"Error processing well: {e}")

    def _generate_additional_table(self, cvs):
        try:
            with concurrent.futures.ThreadPoolExecutor() as executor:
                return list(executor.map(lambda idx: self._process_well(idx, cvs), range(len(self.all_y_values))))
        except Exception as e:
            raise ValueError(f"Error generating additional table: {e}")

    def get_metrics(self):
        try:
            r_squared, adjusted_r_squared, rss = self._calculate_metrics()
            cvs = self._calculate_cv()
            table_details = self._generate_additional_table(cvs)
            lld = self._calculate_lld()
            method_name = f"{'Weighted' if self.weighted else 'Unweighted'}{' Forced to Zero' if self.forced_to_zero else ''}"
            return {
                "method": method_name,
                "function": {"a": self.a, "b": self.b},
                "RSS": rss,
                "R_Squared": r_squared,
                "Adjusted_R_Squared": adjusted_r_squared,
                "LLD": lld,
                "Additional_Table_Details": table_details
            }
        except Exception as e:
            raise ValueError(f"Error getting metrics: {e}")

# Sample data
data = [
    {"coordinates": "A1", "identifier": "S1", "x": [25], "y": [640]},
    {"coordinates": "A2", "identifier": "S1", "x": [25], "y": [664]},
    {"coordinates": "A3", "identifier": "U1", "x": [], "y": [390]},
    {"coordinates": "A4", "identifier": "U2", "x": [], "y": [250]},
    {"coordinates": "B1", "identifier": "S2", "x": [50], "y": [426]},
    {"coordinates": "B2", "identifier": "S2", "x": [50], "y": [436]},
    {"coordinates": "B3", "identifier": "C1", "x": [20], "y": [380]},
    {"coordinates": "B4", "identifier": "C2", "x": [40], "y": [220]},
    {"coordinates": "C1", "identifier": "S3", "x": [100], "y": [292]},
    {"coordinates": "C2", "identifier": "S3", "x": [100], "y": [290]},
    {"coordinates": "C3", "identifier": "B", "x": [], "y": [8]},
    {"coordinates": "C4", "identifier": "X", "x": [], "y": [10]}
]

# Initialize the LinearRegression class with processed data
lr = LinearRegression(data)

# Get metrics
metrics = lr.get_metrics()
print(metrics)
