Source code for alproj.optimize

import numpy as np
import warnings
from math import pi, sin, cos, tan, atan
import pandas as pd
from cmaes import CMA
from scipy.optimize import least_squares
from tqdm import tqdm

[docs] def intrinsic_mat(fov_x_deg, w, h, cx=None, cy=None): """ Makes an intrinsic camera matrix (in OpenCV style) from given parameters. See https://learnopencv.com/geometry-of-image-formation/ . Parameters ---------- fov_x_deg : float Field of View in degree. w : int Width of the image h : int Height of the image cx : float default None X-coordinate of the principal point. If None, cx = w/2. cy : float default None Y-coordinate of the principal point. If None, cy = h/2. Returns ------- mat : numpy.ndarray Intrinsic matrix. """ if cx == None: cx = w/2 if cy == None: cy = h/2 fov_x = fov_x_deg * pi / 180 fov_y = 2 * atan(tan(fov_x / 2) * h / w) fx = w / (2 * tan(fov_x/2)) fy = h / (2 * tan(fov_y/2)) mat = np.array([ [fx, 0, cx], [0, fy, cy], [0, 0, 1] ]) return mat
[docs] def extrinsic_mat(pan_deg, tilt_deg, roll_deg, t_x, t_y, t_z): """ Makes an extrinsic camera matrix from given parameters. See https://learnopencv.com/geometry-of-image-formation/ . Parameters ---------- pan_deg : float Pan angle in degree. tilt_deg : float Tilt angle in degree. roll_deg : float Roll angle in degree. t_x : float X-axis coordinate of the camera location. t_y : float Y-axis coordinate of the camera location. t_z : float Z-axis coordinate of the camera location. Returns ------- mat : numpy.ndarray Extrinsic camera matrix. """ pan = pan_deg * pi / 180 tilt = -(tilt_deg + 90) * pi / 180 roll = -roll_deg * pi / 180 rmat_z = np.array([ [cos(pan), -sin(pan), 0], [sin(pan), cos(pan), 0], [0, 0, 1] ]) rmat_x = np.array([ [1, 0, 0], [0, cos(tilt), -sin(tilt)], [0, sin(tilt), cos(tilt)] ]) rmat_y = np.array([ [cos(roll), 0, sin(roll)], [0, 1, 0], [-sin(roll), 0, cos(roll)] ]) rmat = np.dot(np.dot(rmat_x, rmat_y), rmat_z) tmat = np.array([ [-t_x], [-t_y], [-t_z] ]) tmat = np.dot(rmat, tmat) return np.vstack((np.hstack((rmat, tmat)), np.array([0,0,0,1])))
def _distort(points, w, h, a1, a2, k1, k2, k3, k4, k5, k6, p1, p2, s1, s2, s3, s4, cx=None, cy=None): """ Apply lens distortion to 2D points using the camera's distortion model. For detailed parameter descriptions, see alproj.project.persp_proj(). Parameters ---------- cx : float, default None X-coordinate of the principal point used as distortion center. If None, defaults to (w - 1) / 2 for backward compatibility. cy : float, default None Y-coordinate of the principal point used as distortion center. If None, defaults to (h - 1) / 2 for backward compatibility. """ if cx is None: cx = (w - 1) / 2 if cy is None: cy = (h - 1) / 2 centre = np.array([cx, cy], dtype='float32') x1 = (points[:,0] - centre[0]) / centre[0] y1 = (points[:,1] - centre[1]) / centre[1] r = (x1**2 + y1**2)**0.5 r2 = r**2 r4 = r**4 r6 = r**6 x1_d = x1 * (1 + k1*r2 + k2*r4 + k3*r6) / (1 + k4*r2 + k5*r4 + k6*r6) + \ 2*p1*x1*y1 + p2*(r2 + 2*x1**2) + \ s1*r2 + s2*r4 y1_d = y1 * (1 + a1 + k1*r2 + k2*r4 + k3*r6) / (1 + a2 + k4*r2 + k5*r4 + k6*r6) + \ p1*(r2 + 2*y1**2) + 2*p2*x1*y1 + s3*r2 + s4*r4 x1_d = x1_d * centre[0] + centre[0] y1_d = y1_d * centre[1] + centre[1] pts_d = np.stack([x1_d, y1_d], axis = 0).T return pts_d def _fisheye_project(obj_points, params): """ Equidistant fisheye projection: 3D world coordinates -> 2D image coordinates. Uses the front angle convention where theta = -arctan2(r_cam, -Z) and image radius r = f * theta_d with f = w / fov_rad (equidistant focal length). Parameters ---------- obj_points : pandas.DataFrame Coordinates of the points in 3D geographic coordinate system. The column names must be x, y, z. params : dict Camera parameters. Required keys: - x, y, z: Camera position - fov, pan, tilt, roll: Orientation - w, h: Image dimensions - cx, cy: Principal point - k1, k2, k3, k4: Angle-space distortion coefficients (default 0) - a1, a2: Aspect ratio correction (default 1.0). Scales horizontal and vertical focal lengths independently. - p1, p2: Tangential (decentering) distortion in image space (default 0) Returns ------- uv : pandas.DataFrame 2D projected coordinates with columns u, v. """ if params["fov"] <= 0: raise ValueError(f"fov must be positive, got {params['fov']}") obj = obj_points[["x", "y", "z"]] op = np.vstack((obj.to_numpy().T, np.ones([1, len(obj)]))) emat = extrinsic_mat( params["pan"], params["tilt"], params["roll"], params["x"], params["y"], params["z"]) cam = np.dot(emat, op)[:3, :] X, Y, Z = cam[0], cam[1], cam[2] r_cam = np.sqrt(X**2 + Y**2) theta = -np.arctan2(r_cam, -Z) k1 = params.get("k1", 0.0) k2 = params.get("k2", 0.0) k3 = params.get("k3", 0.0) k4 = params.get("k4", 0.0) t2 = theta**2 theta_d = theta * (1.0 + k1 * t2 + k2 * t2**2 + k3 * t2**3 + k4 * t2**4) fov_rad = params["fov"] * pi / 180.0 f = params["w"] / fov_rad # Aspect ratio correction a1 = params.get("a1", 1.0) a2 = params.get("a2", 1.0) if abs(a1) < 1e-10 or abs(a2) < 1e-10: raise ValueError(f"a1 and a2 must be non-zero, got a1={a1}, a2={a2}") safe_r = np.where(r_cam > 1e-10, r_cam, 1.0) r_img = f * theta_d u = params["w"] - (r_img * (X / safe_r) * a1 + params["cx"]) v = r_img * (Y / safe_r) * a2 + params["cy"] on_axis = r_cam < 1e-10 u = np.where(on_axis, params["w"] - params["cx"], u) v = np.where(on_axis, params["cy"], v) # Tangential (decentering) distortion in image space p1 = params.get("p1", 0.0) p2 = params.get("p2", 0.0) if p1 != 0.0 or p2 != 0.0: w_half = params["w"] / 2 h_half = params["h"] / 2 x_n = (u - (params["w"] - params["cx"])) / w_half y_n = (v - params["cy"]) / h_half r2 = x_n**2 + y_n**2 du = 2 * p1 * x_n * y_n + p2 * (r2 + 2 * x_n**2) dv = p1 * (r2 + 2 * y_n**2) + 2 * p2 * x_n * y_n u = u + du * w_half v = v + dv * h_half return pd.DataFrame({"u": u, "v": v}) def _pinhole_project(obj_points, params): """ Pinhole projection: 3D world coordinates -> 2D image coordinates. Uses intrinsic/extrinsic matrices and the pinhole distortion model. Parameters ---------- obj_points : pandas.DataFrame Coordinates of the points in 3D geographic coordinate system. The column names must be x, y, z. params : dict Camera parameters. For detailed parameter descriptions, see alproj.project.persp_proj(). Returns ------- uv : pandas.DataFrame 2D projected coordinates with columns u, v. """ obj_points = obj_points[["x", "y", "z"]] op = np.vstack((obj_points.to_numpy().T, np.ones([1,len(obj_points)]))) imat = intrinsic_mat(params["fov"], params["w"], params["h"], params["cx"], params["cy"]) emat = extrinsic_mat(params["pan"], params["tilt"], params["roll"], params["x"], params["y"], params["z"]) op_cc = np.dot(emat, op) # Object points in camera coordinate system op_ic = np.dot(imat, op_cc[:3,:]) # Object points in image coordinate system uv = np.array([ params["w"] - op_ic[0,:]/op_ic[2,:], op_ic[1,:]/op_ic[2,:] ]).T uv_distort = _distort( uv, params["w"], params["h"], params["a1"], params["a2"], params["k1"], params["k2"], params["k3"], params["k4"], params["k5"], params["k6"], params["p1"], params["p2"], params["s1"], params["s2"], params["s3"], params["s4"], cx=params["cx"], cy=params["cy"]) uv_df = pd.DataFrame(uv_distort, columns = ["u", "v"]) return uv_df
[docs] def project(obj_points, params): """ 3D to 2D Perspective projection of given points with given camera parameters. Parameters ---------- obj_points : pandas.DataFrame Coordinates of the points (usually GCPs) in 3D geographic coordinate system. The column names must be x, y, z. params : dict Camera parameters. For detailed parameter descriptions, see alproj.project.persp_proj(). If params contains ``"model": "fisheye"``, uses equidistant fisheye projection. Otherwise uses the default pinhole projection. Returns ------- uv : pandas.DataFrame 2D projected coordinates with columns u, v. """ if params.get("model") == "fisheye": return _fisheye_project(obj_points, params) return _pinhole_project(obj_points, params)
[docs] def mean_reprojection_error(img_points, projected, weights=None): """ Calculate mean reprojection error (mean Euclidean distance) of the projection. Parameters ---------- img_points : pandas.DataFrame Observed image coordinates with columns u, v. projected : pandas.DataFrame Projected image coordinates with columns u, v. weights : numpy.ndarray, default None Per-point weights. If provided, returns weighted average. Returns ------- error : float Mean reprojection error in pixels. """ img_arr = img_points[["u", "v"]].to_numpy() proj_arr = projected[["u", "v"]].to_numpy() dist = np.sqrt((img_arr[:, 0] - proj_arr[:, 0])**2 + (img_arr[:, 1] - proj_arr[:, 1])**2) if weights is not None: weights = np.asarray(weights, dtype=float) if len(weights) != len(dist): raise ValueError( f"weights length ({len(weights)}) != number of points ({len(dist)})") if np.any(weights < 0): raise ValueError("weights must be non-negative") return np.average(dist, weights=weights) return np.mean(dist)
[docs] def rmse(img_points, projected, weights=None): """ Deprecated alias for :func:`mean_reprojection_error`. .. deprecated:: 1.2.0 Use :func:`mean_reprojection_error` instead. This function computes mean Euclidean distance, not root-mean-square error. """ warnings.warn( "rmse() is deprecated, use mean_reprojection_error() instead. " "This function computes mean Euclidean distance, not RMSE.", DeprecationWarning, stacklevel=2) return mean_reprojection_error(img_points, projected, weights)
[docs] def huber_loss(img_points, projected, f_scale=10.0, weights=None): """ Calculate Huber loss for robust optimization. Huber loss is less sensitive to outliers than squared error loss. For residuals below f_scale, it behaves like L2 (squared) loss. For residuals above f_scale, it behaves like L1 (linear) loss. Parameters ---------- img_points : pandas.DataFrame Observed image coordinates with columns u, v. projected : pandas.DataFrame Projected image coordinates with columns u, v. f_scale : float default 10.0 Threshold in pixels. Residuals below f_scale use L2, above use L1. weights : numpy.ndarray, default None Per-point weights. If provided, returns weighted average. Returns ------- loss : float Mean Huber loss value. """ img_arr = img_points[["u", "v"]].to_numpy() proj_arr = projected[["u", "v"]].to_numpy() residuals = np.sqrt((img_arr[:, 0] - proj_arr[:, 0])**2 + (img_arr[:, 1] - proj_arr[:, 1])**2) loss = np.where( residuals <= f_scale, 0.5 * residuals**2, f_scale * (residuals - 0.5 * f_scale) ) if weights is not None: weights = np.asarray(weights, dtype=float) if len(weights) != len(loss): raise ValueError( f"weights length ({len(weights)}) != number of points ({len(loss)})") if np.any(weights < 0): raise ValueError("weights must be non-negative") return np.average(loss, weights=weights) return np.mean(loss)
[docs] def compute_residuals(obj_points, img_points, params): """ Compute residual vector for least_squares optimization. Parameters ---------- obj_points : pandas.DataFrame Geographic coordinates of the Ground Control Points. img_points : pandas.DataFrame Image coordinates of the Ground Control Points. params : dict Camera parameters. Returns ------- residuals : numpy.ndarray Flattened residual vector (observed - projected). """ projected = project(obj_points, params) img_arr = img_points[["u", "v"]].to_numpy() proj_arr = projected.to_numpy() residuals = (img_arr - proj_arr).flatten() return residuals
DEFAULT_BOUND_WIDTHS = { "fov": 45, "pan": 45, "tilt": 45, "roll": 45, "x": 30, "y": 30, "z": 30, "cx": 50, "cy": 50, "a1": 0.2, "a2": 0.2, "k1": 0.2, "k2": 0.2, "k3": 0.2, "k4": 0.2, "k5": 0.2, "k6": 0.2, "p1": 0.2, "p2": 0.2, "s1": 0.2, "s2": 0.2, "s3": 0.2, "s4": 0.2, }
[docs] def bounds_to_array(params_init, target_params, bound_widths=None): """ Convert bound widths (dict) to numpy array for CMA-ES optimizer. Parameters ---------- params_init : dict Initial values of camera parameters. target_params : list Parameters to be optimized. bound_widths : dict default None Width from initial value for each parameter (e.g., {"fov": 45, "x": 30}). If None or parameter not specified, uses DEFAULT_BOUND_WIDTHS. Returns ------- bounds : numpy.ndarray Bounds array with shape (len(target_params), 2). """ if bound_widths is None: bound_widths = {} bounds = np.zeros((len(target_params), 2)) for i, key in enumerate(target_params): value = params_init[key] width = bound_widths.get(key, DEFAULT_BOUND_WIDTHS.get(key, 0.2)) bounds[i, :] = np.array([value - width, value + width]) return bounds
[docs] class BaseOptimizer: """ Base class for camera parameter optimizers. Attributes ---------- obj_points : pandas.DataFrame Geographic coordinates of the Ground Control Points. The column names must be x, y, z. See alproj.gcp.set_gcp() img_points : pandas.DataFrame Image coordinates of the Ground Control Points. The column names must be u, v. See alproj.gcp.set_gcp() params_init : dict Initial values of camera parameters. Must contain: - x, y, z: Camera position in projected CRS (e.g., UTM in meters) - fov: Field of view in degrees - pan, tilt, roll: Orientation angles in degrees - a1, a2, k1-k6, p1, p2, s1-s4: Distortion coefficients - w, h: Image width and height in pixels - cx, cy: Principal point coordinates """ def __init__(self, obj_points, img_points, params_init): self.obj_points = obj_points self.img_points = img_points self.params_init = params_init
[docs] def set_target(self, target_params=["fov", "pan", "tilt", "roll", "a1", "a2", "k1", "k2", "k3", "k4", "k5", "k6", "p1", "p2", "s1", "s2", "s3", "s4"]): """ Set which parameters to be optimized. Parameters ---------- target_params : list default ["fov", "pan", "tilt", "roll", "a1", "a2", "k1", "k2", "k3", "k4", "k5", "k6", "p1", "p2", "s1", "s2", "s3", "s4"] Parameters to be optimized. You can also include x, y, and z for camera location. """ p = self.params_init t = target_params self.target_params = target_params self.target_params_init = np.array([p[ti] for ti in t])
[docs] class CMAOptimizer(BaseOptimizer): """ Camera parameter optimizer using Covariance Matrix Adaptation Evolution Strategy (CMA-ES). See https://pypi.org/project/cmaes/ . You can select which parameters to be optimized, including camera location (x, y, z). """ def _loss_function(self, bounds, f_scale=None, weights=None): """ Create loss function with normalization. Parameters ---------- bounds : numpy.ndarray Bounds array with shape (len(target_params), 2). f_scale : float default None Threshold for Huber loss in pixels. If None, uses mean error. If specified, uses Huber loss with this threshold. weights : numpy.ndarray, default None Per-GCP weights for weighted loss computation. """ params = self.params_init.copy() for t in self.target_params: params.pop(t) lower = bounds[:, 0] upper = bounds[:, 1] def _proj_error(normalized_values): # Denormalize: [0, 1] -> [lower, upper] values = normalized_values * (upper - lower) + lower params.update(dict(zip(self.target_params, values))) projected = project(self.obj_points, params) if f_scale is None: loss = mean_reprojection_error( self.img_points, projected, weights) else: loss = huber_loss( self.img_points, projected, f_scale, weights) return loss return _proj_error
[docs] def optimize(self, sigma=0.2, bound_widths=None, generation=1000, population_size=10, n_max_resampling=100, f_scale=None, weights=None): """ CMA-optimization of camera parameters. See https://github.com/CyberAgent/cmaes/blob/main/cmaes/_cma.py . Parameters are normalized to [0, 1] range internally for efficient optimization. Parameters ---------- sigma : float default 0.2 Initial standard deviation of covariance matrix (in normalized [0, 1] space). bound_widths : dict default None Width from initial value for each parameter (e.g., {"fov": 30, "x": 50}). If None or parameter not specified, uses default widths: +-45 degrees for fov, pan, tilt, roll; +-30 meters for x, y, z; +-0.2 for distortion coefficients. Note that large absolute values of distortion coefficients may cause broken projection. generation : int default 1000 Number of generations to run. population_size : int default 10 Population size. n_max_resampling : int default 100 A maximum number of resampling parameters (default: 100). If all sampled parameters are infeasible, the last sampled one will be clipped with lower and upper bounds. f_scale : float default None Threshold for Huber loss in pixels. If None, uses mean reprojection error. If specified (e.g., 10.0), uses Huber loss which is more robust to outliers. weights : numpy.ndarray, default None Per-GCP weights for the loss function. Higher weights increase the importance of the corresponding GCPs. Useful for balancing center and edge region accuracy. Returns ------- params : dict Optimized camera parameters. error : float Mean reprojection error in pixels (unweighted). """ # Compute bounds in original space bounds = bounds_to_array(self.params_init, self.target_params, bound_widths) lower = bounds[:, 0] upper = bounds[:, 1] # Normalize initial values to [0, 1] # Initial value is at center of bounds, so normalized = 0.5 normalized_init = (self.target_params_init - lower) / (upper - lower) # CMA-ES works in normalized [0, 1] space normalized_bounds = np.column_stack([np.zeros(len(self.target_params)), np.ones(len(self.target_params))]) loss_function = self._loss_function(bounds, f_scale, weights) optimizer = CMA( mean=normalized_init.astype("float64"), sigma=float(sigma), bounds=normalized_bounds, population_size=population_size, n_max_resampling=n_max_resampling ) best_loss = float("inf") best_normalized = normalized_init.copy() for _ in tqdm(range(generation)): solutions = [] for _ in range(population_size): x = optimizer.ask() value = loss_function(x) solutions.append((x, value)) if value < best_loss: best_loss = value best_normalized = x.copy() optimizer.tell(solutions) # Denormalize best solution found across all generations best_values = best_normalized * (upper - lower) + lower params = self.params_init.copy() for t in self.target_params: params.pop(t) params.update(dict(zip(self.target_params, best_values))) # Always return unweighted mean reprojection error for consistency projected = project(self.obj_points, params) error = mean_reprojection_error(self.img_points, projected) return params, error
[docs] class LsqOptimizer(BaseOptimizer): """ Camera parameter optimizer using scipy.optimize.least_squares. Supports Trust Region Reflective (trf), dogbox, and Levenberg-Marquardt (lm) methods. """ def _residual_function(self, weights=None): """ Create residual function for least_squares. Parameters ---------- weights : numpy.ndarray, default None Per-GCP weights. Applied as sqrt(weight) to residuals. Returns ------- residual_func : callable Function that computes residuals given parameter values. """ params = self.params_init.copy() for t in self.target_params: params.pop(t) if weights is not None: w = np.repeat(np.sqrt(weights), 2) else: w = None def _residuals(values): params.update(dict(zip(self.target_params, values))) r = compute_residuals(self.obj_points, self.img_points, params) if w is not None: r = r * w return r return _residuals
[docs] def optimize(self, method="trf", bound_widths=None, loss="linear", f_scale=1.0, weights=None, **kwargs): """ Least-squares optimization of camera parameters. Parameters ---------- method : str default "trf" Algorithm to use: "trf" (Trust Region Reflective), "dogbox", or "lm" (Levenberg-Marquardt). Note: "lm" does not support bounds or robust loss functions. bound_widths : dict default None Width from initial value for each parameter (e.g., {"fov": 30, "x": 50}). If None, uses default widths. Ignored when method="lm". loss : str default "linear" Loss function to use. Options: - "linear": Standard least squares (L2 loss). - "huber": Huber loss, robust to outliers. - "soft_l1": Smooth approximation of L1 loss. - "cauchy": Cauchy loss, strongly robust to outliers. - "arctan": Arctan loss. Note: Only "linear" is supported when method="lm". f_scale : float default 1.0 Soft threshold for inlier residuals (in pixels). Residuals below f_scale are treated normally, while those above are down-weighted according to the loss function. Has no effect when loss="linear". Typical values: 1.0-20.0 pixels depending on expected outlier magnitude. weights : numpy.ndarray, default None Per-GCP weights for the loss function. Applied as sqrt(weight) scaling to the residual vector. Useful for region-balanced optimization. **kwargs : Additional arguments passed to scipy.optimize.least_squares. Returns ------- params : dict Optimized camera parameters. error : float Mean reprojection error in pixels (unweighted). """ if method == "lm" and bound_widths is not None: raise ValueError("method='lm' does not support bounds. Set bound_widths=None or use 'trf'/'dogbox'.") if method == "lm" and loss != "linear": raise ValueError("method='lm' does not support robust loss functions. Use loss='linear' or method='trf'/'dogbox'.") residual_func = self._residual_function(weights) if method == "lm": result = least_squares( residual_func, self.target_params_init, method=method, **kwargs ) else: bounds = bounds_to_array(self.params_init, self.target_params, bound_widths) lower = bounds[:, 0] upper = bounds[:, 1] result = least_squares( residual_func, self.target_params_init, method=method, bounds=(lower, upper), loss=loss, f_scale=f_scale, **kwargs ) best_values = result.x params = self.params_init.copy() for t in self.target_params: params.pop(t) params.update(dict(zip(self.target_params, best_values))) projected = project(self.obj_points, params) error = mean_reprojection_error(self.img_points, projected) return params, error