tnc/contractionpath/repartitioning/
simulated_annealing.rs

1//! Repartitioning using simulated annealing algorithms.
2
3use std::{
4    iter::zip,
5    time::{Duration, Instant},
6};
7
8use itertools::Itertools;
9use ordered_float::NotNan;
10use rand::{rngs::StdRng, seq::IteratorRandom, Rng, SeedableRng};
11use rayon::iter::{IndexedParallelIterator, IntoParallelRefMutIterator, ParallelIterator};
12use rustc_hash::FxHashSet;
13
14use crate::{
15    contractionpath::{
16        communication_schemes::CommunicationScheme,
17        contraction_cost::{compute_memory_requirements, contract_size_tensors_exact},
18        paths::{
19            cotengrust::{Cotengrust, OptMethod},
20            FindPath,
21        },
22        repartitioning::compute_solution,
23        SimplePath,
24    },
25    tensornetwork::{partitioning::partition_tensor_network, tensor::Tensor},
26};
27
28type ScoreType = NotNan<f64>;
29
30/// Number of threads to use for processing candidate solutions in parallel. This is
31/// a constant (and not hardware-aware) for reproducibility.
32const PROCESSING_THREADS: usize = 48;
33
34/// OptModel is a trait that defines requirements to be used with optimization algorithm
35pub trait OptModel: Sync + Send {
36    /// Type of the Solution
37    type SolutionType: Clone + Sync + Send;
38
39    /// Generate a new trial solution from current solution
40    fn generate_trial_solution<R: Rng>(
41        &self,
42        current_solution: Self::SolutionType,
43        rng: &mut R,
44    ) -> Self::SolutionType;
45
46    /// Evaluate the score of the solution
47    fn evaluate<R: Rng>(&self, solution: &Self::SolutionType, rng: &mut R) -> ScoreType;
48}
49
50/// Optimizer that implements the simulated annealing algorithm
51#[derive(Clone, Copy)]
52pub struct SimulatedAnnealingOptimizer {
53    /// Number of candidate solutions to generate and evaluate in each iteration.
54    n_trials: usize,
55    /// Total duration to take for the optimization
56    max_time: Duration,
57    /// Number of steps to take in each temperature iteration.
58    n_steps: usize,
59    /// Number of iterations without improvement after which the algorithm should
60    /// restart from the best solution found so far.
61    restart_iter: usize,
62    /// The initial temperature to start the annealing process with.
63    initial_temperature: f64,
64    /// The final temperature to reach at the end of the annealing process.
65    final_temperature: f64,
66}
67
68/// Linearly interpolates between two numbers based on parameter `t`.
69///
70/// Computes `start + (end - start) * t`.
71#[inline]
72fn linear_interpolation(start: f64, end: f64, t: f64) -> f64 {
73    (end - start).mul_add(t, start)
74}
75
76impl SimulatedAnnealingOptimizer {
77    /// Start optimization with given temperature range
78    ///
79    /// - `model` : the model to optimize
80    /// - `initial_solution` : the initial solution to start optimization.
81    /// - `n_iter`: maximum iterations
82    #[allow(clippy::too_many_arguments)]
83    fn optimize_with_temperature<M, R>(
84        &self,
85        model: &M,
86        initial_solution: M::SolutionType,
87        rng: &mut R,
88    ) -> (M::SolutionType, ScoreType)
89    where
90        M: OptModel,
91        R: Rng,
92    {
93        let mut current_score = model.evaluate(&initial_solution, rng);
94        let mut current_solution = initial_solution;
95        let mut best_solution = current_solution.clone();
96        let mut best_score = current_score;
97        let mut last_improvement = 0;
98        let steps_per_thread = self.n_steps.div_ceil(self.n_trials);
99
100        let log_start = self.initial_temperature.log2();
101        let log_end = self.final_temperature.log2();
102        let total_seconds = self.max_time.as_secs_f64();
103        let mut temperature = self.initial_temperature;
104        let mut rngs = (0..self.n_trials)
105            .map(|_| StdRng::seed_from_u64(rng.random()))
106            .collect_vec();
107        let end_time = Instant::now() + self.max_time;
108        loop {
109            // Generate and evaluate candidate solutions to find the minimum objective
110            let (_, trial_solution, trial_score) = rngs
111                .par_iter_mut()
112                .enumerate()
113                .map(|(index, rng)| {
114                    let mut trial_score = current_score;
115                    let mut trial_solution = current_solution.clone();
116                    for _ in 0..steps_per_thread {
117                        let solution = model.generate_trial_solution(trial_solution.clone(), rng);
118                        let score = model.evaluate(&solution, rng);
119
120                        let diff = (score / trial_score).log2();
121                        let acceptance_probability = (-diff / temperature).exp();
122                        let random_value = rng.random();
123
124                        if acceptance_probability >= random_value {
125                            trial_solution = solution;
126                            trial_score = score;
127                        }
128                    }
129                    (index, trial_solution, trial_score)
130                })
131                .min_by_key(|(index, _, score)| (*score, *index))
132                .unwrap();
133
134            current_score = trial_score;
135            current_solution = trial_solution;
136
137            // Update the best solution if the current solution is better
138            if current_score < best_score {
139                best_solution = current_solution.clone();
140                best_score = current_score;
141                last_improvement = 0;
142            }
143
144            last_improvement += 1;
145
146            // Check if we should restart from the best solution
147            if last_improvement == self.restart_iter {
148                current_solution = best_solution.clone();
149                current_score = best_score;
150            }
151
152            // Estimate the number of remaining iterations and adapt the temperature
153            let now = Instant::now();
154            if now > end_time {
155                // We've reached the time limit
156                break;
157            }
158            let remaining_time = (end_time - now).as_secs_f64();
159            let progress = 1.0 - remaining_time / total_seconds;
160            temperature = linear_interpolation(log_start, log_end, progress).exp2();
161        }
162
163        (best_solution, best_score)
164    }
165}
166
167/// Common evaluation method for all simulated annealing methods.
168fn evaluate_partitioning<R>(
169    tensor: &Tensor,
170    partitioning: &[usize],
171    communication_scheme: CommunicationScheme,
172    memory_limit: Option<f64>,
173    rng: &mut R,
174) -> NotNan<f64>
175where
176    R: Rng,
177{
178    // Construct the tensor network and contraction path from the partitioning
179    let (partitioned_tn, path, parallel_cost, _) =
180        compute_solution(tensor, partitioning, communication_scheme, Some(rng));
181
182    // If the memory limit is exceeded, return infinity
183    if let Some(limit) = memory_limit {
184        // Compute memory usage
185        let mem = compute_memory_requirements(
186            partitioned_tn.tensors(),
187            &path,
188            contract_size_tensors_exact,
189        );
190
191        if mem > limit {
192            return NotNan::new(f64::INFINITY).unwrap();
193        }
194    }
195    NotNan::new(parallel_cost).unwrap()
196}
197
198/// A simulated annealing model that moves a random tensor between random partitions.
199pub struct NaivePartitioningModel<'a> {
200    pub tensor: &'a Tensor,
201    pub num_partitions: usize,
202    pub communication_scheme: CommunicationScheme,
203    pub memory_limit: Option<f64>,
204}
205
206impl OptModel for NaivePartitioningModel<'_> {
207    type SolutionType = Vec<usize>;
208
209    fn generate_trial_solution<R: Rng>(
210        &self,
211        mut current_solution: Self::SolutionType,
212        rng: &mut R,
213    ) -> Self::SolutionType {
214        let tensor_index = rng.random_range(0..current_solution.len());
215        let current_partition = current_solution[tensor_index];
216        let new_partition = loop {
217            let b = rng.random_range(0..self.num_partitions);
218            if b != current_partition {
219                break b;
220            }
221        };
222        current_solution[tensor_index] = new_partition;
223        current_solution
224    }
225
226    fn evaluate<R: Rng>(&self, solution: &Self::SolutionType, rng: &mut R) -> ScoreType {
227        evaluate_partitioning(
228            self.tensor,
229            solution,
230            self.communication_scheme,
231            self.memory_limit,
232            rng,
233        )
234    }
235}
236
237/// A simulated annealing model that moves a random subtree between random partitions.
238pub struct NaiveIntermediatePartitioningModel<'a> {
239    pub tensor: &'a Tensor,
240    pub num_partitions: usize,
241    pub communication_scheme: CommunicationScheme,
242    pub memory_limit: Option<f64>,
243}
244
245impl OptModel for NaiveIntermediatePartitioningModel<'_> {
246    type SolutionType = (Vec<usize>, Vec<SimplePath>);
247
248    fn generate_trial_solution<R: Rng>(
249        &self,
250        current_solution: Self::SolutionType,
251        rng: &mut R,
252    ) -> Self::SolutionType {
253        let (mut partitioning, mut contraction_paths) = current_solution;
254
255        // Select source partition (with more than one tensor)
256        let source_partition = contraction_paths
257            .iter()
258            .enumerate()
259            .filter_map(|(contraction_id, contraction)| {
260                if contraction.len() >= 3 {
261                    Some(contraction_id)
262                } else {
263                    None
264                }
265            })
266            .choose(rng);
267
268        let Some(source_partition) = source_partition else {
269            // No viable partitions, return the current solution
270            return (partitioning, contraction_paths);
271        };
272
273        // Select random tensor contraction in source partition
274        let pair_index = rng.random_range(0..contraction_paths[source_partition].len() - 1);
275        let (i, j) = contraction_paths[source_partition][pair_index];
276        let mut tensor_leaves = FxHashSet::from_iter([i, j]);
277
278        // Gather all tensors that contribute to the selected contraction
279        for (i, j) in contraction_paths[source_partition]
280            .iter()
281            .take(pair_index)
282            .rev()
283        {
284            if tensor_leaves.contains(i) {
285                tensor_leaves.insert(*j);
286            }
287        }
288
289        let mut shifted_indices = Vec::with_capacity(tensor_leaves.len());
290        for (partition_tensor_index, (i, _partition)) in partitioning
291            .iter()
292            .enumerate()
293            .filter(|(_, partition)| *partition == &source_partition)
294            .enumerate()
295        {
296            if tensor_leaves.contains(&partition_tensor_index) {
297                shifted_indices.push(i);
298            }
299        }
300
301        // Select random target partition
302        let target_partition = loop {
303            let b = rng.random_range(0..self.num_partitions);
304            if b != source_partition {
305                break b;
306            }
307        };
308
309        // Change partition
310        for index in shifted_indices {
311            partitioning[index] = target_partition;
312        }
313
314        // Recompute the contraction path for both partitions
315        let mut from_tensor = Tensor::default();
316        let mut to_tensor = Tensor::default();
317        for (partition_index, tensor) in zip(&partitioning, self.tensor.tensors()) {
318            if *partition_index == source_partition {
319                from_tensor.push_tensor(tensor.clone());
320            } else if *partition_index == target_partition {
321                to_tensor.push_tensor(tensor.clone());
322            }
323        }
324
325        let mut from_opt = Cotengrust::new(&from_tensor, OptMethod::Greedy);
326        from_opt.find_path();
327        let from_path = from_opt.get_best_replace_path();
328        contraction_paths[source_partition] = from_path.into_simple();
329
330        let mut to_opt = Cotengrust::new(&to_tensor, OptMethod::Greedy);
331        to_opt.find_path();
332        let to_path = to_opt.get_best_replace_path();
333        contraction_paths[target_partition] = to_path.into_simple();
334
335        (partitioning, contraction_paths)
336    }
337
338    fn evaluate<R: Rng>(&self, solution: &Self::SolutionType, rng: &mut R) -> ScoreType {
339        evaluate_partitioning(
340            self.tensor,
341            &solution.0,
342            self.communication_scheme,
343            self.memory_limit,
344            rng,
345        )
346    }
347}
348
349/// A simulated annealing model that moves a random tensor to the partition that
350/// maximizes memory reduction.
351pub struct LeafPartitioningModel<'a> {
352    pub tensor: &'a Tensor,
353    pub communication_scheme: CommunicationScheme,
354    pub memory_limit: Option<f64>,
355}
356
357impl OptModel for LeafPartitioningModel<'_> {
358    type SolutionType = (Vec<usize>, Vec<Tensor>);
359
360    fn generate_trial_solution<R: Rng>(
361        &self,
362        current_solution: Self::SolutionType,
363        rng: &mut R,
364    ) -> Self::SolutionType {
365        let (mut partitioning, mut partition_tensors) = current_solution;
366        let tensor_index = rng.random_range(0..partitioning.len());
367        let shifted_tensor = self.tensor.tensor(tensor_index);
368        let source_partition = partitioning[tensor_index];
369
370        let (new_partition, _) = partition_tensors
371            .iter()
372            .enumerate()
373            .filter_map(|(i, partition_tensor)| {
374                if i != source_partition {
375                    Some((
376                        i,
377                        (shifted_tensor ^ partition_tensor).size() - partition_tensor.size(),
378                    ))
379                } else {
380                    // Don't consider old partition as move target (would be a NOOP)
381                    None
382                }
383            })
384            .min_by(|a, b| a.1.total_cmp(&b.1))
385            .unwrap();
386
387        partitioning[tensor_index] = new_partition;
388        partition_tensors[source_partition] ^= shifted_tensor;
389        partition_tensors[new_partition] ^= shifted_tensor;
390        (partitioning, partition_tensors)
391    }
392
393    fn evaluate<R: Rng>(&self, solution: &Self::SolutionType, rng: &mut R) -> ScoreType {
394        evaluate_partitioning(
395            self.tensor,
396            &solution.0,
397            self.communication_scheme,
398            self.memory_limit,
399            rng,
400        )
401    }
402}
403
404/// A simulated annealing model that moves a random subtree to the partition that
405/// maximizes memory reduction.
406pub struct IntermediatePartitioningModel<'a> {
407    pub tensor: &'a Tensor,
408    pub communication_scheme: CommunicationScheme,
409    pub memory_limit: Option<f64>,
410}
411
412impl IntermediatePartitioningModel<'_> {
413    pub fn compute_initial_solution(
414        &self,
415        initial_partitioning: &[usize],
416        initial_contraction_paths: Option<Vec<SimplePath>>,
417    ) -> <IntermediatePartitioningModel<'_> as OptModel>::SolutionType {
418        let partitioned_tn = partition_tensor_network(self.tensor.clone(), initial_partitioning);
419
420        // Get the partition tensors
421        let partition_tensors = partitioned_tn
422            .tensors()
423            .iter()
424            .map(Tensor::external_tensor)
425            .collect_vec();
426
427        let contraction_paths = initial_contraction_paths.unwrap_or_else(|| {
428            // Find contraction paths using Greedy
429            partitioned_tn
430                .tensors()
431                .iter()
432                .map(|t| {
433                    // Find path for this partition
434                    let mut opt = Cotengrust::new(t, OptMethod::Greedy);
435                    opt.find_path();
436                    let path = opt.get_best_replace_path();
437                    path.into_simple()
438                })
439                .collect()
440        });
441
442        // Find a contraction path for the partitioned tensor network
443        (
444            initial_partitioning.to_vec(),
445            partition_tensors,
446            contraction_paths,
447        )
448    }
449}
450
451impl OptModel for IntermediatePartitioningModel<'_> {
452    type SolutionType = (Vec<usize>, Vec<Tensor>, Vec<SimplePath>);
453
454    fn generate_trial_solution<R: Rng>(
455        &self,
456        current_solution: Self::SolutionType,
457        rng: &mut R,
458    ) -> Self::SolutionType {
459        let (mut partitioning, mut partition_tensors, mut contraction_paths) = current_solution;
460
461        // Select source partition (with more than one tensor)
462        let source_partition = contraction_paths
463            .iter()
464            .enumerate()
465            .filter_map(|(contraction_id, contraction)| {
466                if contraction.len() >= 3 {
467                    Some(contraction_id)
468                } else {
469                    None
470                }
471            })
472            .choose(rng);
473
474        let Some(source_partition) = source_partition else {
475            // No viable partitions, return the current solution
476            return (partitioning, partition_tensors, contraction_paths);
477        };
478
479        // Select random tensor contraction in source partition
480        let pair_index = rng.random_range(0..contraction_paths[source_partition].len() - 1);
481        let (i, j) = contraction_paths[source_partition][pair_index];
482        let mut tensor_leaves = FxHashSet::from_iter([i, j]);
483
484        // Gather all tensors that contribute to the selected contraction
485        for (i, j) in contraction_paths[source_partition]
486            .iter()
487            .take(pair_index)
488            .rev()
489        {
490            if tensor_leaves.contains(i) {
491                tensor_leaves.insert(*j);
492            }
493        }
494
495        let mut shifted_tensor = Tensor::default();
496        let mut shifted_indices = Vec::with_capacity(tensor_leaves.len());
497        for (partition_tensor_index, (i, _partition)) in partitioning
498            .iter()
499            .enumerate()
500            .filter(|(_, partition)| *partition == &source_partition)
501            .enumerate()
502        {
503            if tensor_leaves.contains(&partition_tensor_index) {
504                shifted_tensor ^= self.tensor.tensor(i);
505                shifted_indices.push(i);
506            }
507        }
508
509        // Find best target partition
510        // Cost function is actually quite important!!
511        let (target_partition, _) = partition_tensors
512            .iter()
513            .enumerate()
514            .filter_map(|(i, partition_tensor)| {
515                if i != source_partition {
516                    Some((
517                        i,
518                        (&shifted_tensor ^ partition_tensor).size() - partition_tensor.size(),
519                    ))
520                } else {
521                    // Don't consider old partition as move target (would be a NOOP)
522                    None
523                }
524            })
525            .min_by(|a, b| a.1.total_cmp(&b.1))
526            .unwrap();
527
528        // Change partition
529        for index in shifted_indices {
530            partitioning[index] = target_partition;
531        }
532
533        // Recompute the tensors for both partitions
534        partition_tensors[source_partition] ^= &shifted_tensor;
535        partition_tensors[target_partition] ^= &shifted_tensor;
536
537        // Recompute the contraction path for both partitions
538        let mut from_tensor = Tensor::default();
539        let mut to_tensor = Tensor::default();
540        for (partition_index, tensor) in zip(&partitioning, self.tensor.tensors()) {
541            if *partition_index == source_partition {
542                from_tensor.push_tensor(tensor.clone());
543            } else if *partition_index == target_partition {
544                to_tensor.push_tensor(tensor.clone());
545            }
546        }
547
548        let mut from_opt = Cotengrust::new(&from_tensor, OptMethod::Greedy);
549        from_opt.find_path();
550        let from_path = from_opt.get_best_replace_path();
551        contraction_paths[source_partition] = from_path.into_simple();
552
553        let mut to_opt = Cotengrust::new(&to_tensor, OptMethod::Greedy);
554        to_opt.find_path();
555        let to_path = to_opt.get_best_replace_path();
556        contraction_paths[target_partition] = to_path.into_simple();
557
558        (partitioning, partition_tensors, contraction_paths)
559    }
560
561    fn evaluate<R: Rng>(&self, solution: &Self::SolutionType, rng: &mut R) -> ScoreType {
562        evaluate_partitioning(
563            self.tensor,
564            &solution.0,
565            self.communication_scheme,
566            self.memory_limit,
567            rng,
568        )
569    }
570}
571
572/// Runs simulated annealing to find a better partitioning.
573pub fn balance_partitions<R, M>(
574    model: M,
575    initial_solution: M::SolutionType,
576    rng: &mut R,
577    max_time: Duration,
578) -> (M::SolutionType, ScoreType)
579where
580    R: Rng,
581    M: OptModel,
582{
583    let optimizer = SimulatedAnnealingOptimizer {
584        n_trials: PROCESSING_THREADS,
585        max_time,
586        n_steps: PROCESSING_THREADS * 10,
587        restart_iter: 50,
588        initial_temperature: 2.0,
589        final_temperature: 0.05,
590    };
591    optimizer.optimize_with_temperature(&model, initial_solution, rng)
592}
593
594#[cfg(test)]
595mod tests {
596    use super::*;
597
598    use float_cmp::assert_approx_eq;
599
600    #[test]
601    fn simple_linear_interpolation() {
602        assert_approx_eq!(f64, linear_interpolation(0., 6., 0.5), 3.0);
603        assert_approx_eq!(f64, linear_interpolation(-1.0, 4.0, 0.2), 0.0);
604        assert_approx_eq!(f64, linear_interpolation(-7.0, -6.0, 0.0), -7.0);
605        assert_approx_eq!(f64, linear_interpolation(3.0, 5.0, 1.0), 5.0);
606    }
607
608    #[test]
609    fn small_leaf_partitioning() {
610        let t1 = Tensor::new_from_const(vec![0, 1], 2);
611        let t2 = Tensor::new_from_const(vec![2, 3], 2);
612        let t3 = Tensor::new_from_const(vec![0, 1, 4], 2);
613        let t4 = Tensor::new_from_const(vec![2, 3, 4], 2);
614        let tn = Tensor::new_composite(vec![t1.clone(), t2.clone(), t3.clone(), t4.clone()]);
615        let tn1 = Tensor::new_composite(vec![t1, t2]);
616        let tn2 = Tensor::new_composite(vec![t3, t4]);
617        let initial_partitioning = vec![0, 0, 1, 1];
618        let initial_partitions = vec![tn1, tn2];
619        let mut rng = StdRng::seed_from_u64(42);
620
621        let ((partitioning, _partitions), _) = balance_partitions(
622            LeafPartitioningModel {
623                tensor: &tn,
624                communication_scheme: CommunicationScheme::Greedy,
625                memory_limit: None,
626            },
627            (initial_partitioning, initial_partitions),
628            &mut rng,
629            Duration::from_secs(2),
630        );
631        // Normalize for comparability
632        let ref_partitioning = if partitioning[0] == 0 {
633            [0, 1, 0, 1]
634        } else {
635            [1, 0, 1, 0]
636        };
637        assert_eq!(partitioning, ref_partitioning);
638    }
639}