tnc/contractionpath/repartitioning/
simulated_annealing.rs

1//! Repartitioning using simulated annealing algorithms.
2
3use std::{
4    iter::zip,
5    time::{Duration, Instant},
6};
7
8use itertools::Itertools;
9use ordered_float::NotNan;
10use rand::{rngs::StdRng, seq::IteratorRandom, Rng, SeedableRng};
11use rayon::iter::{IndexedParallelIterator, IntoParallelRefMutIterator, ParallelIterator};
12use rustc_hash::FxHashSet;
13
14use crate::{
15    contractionpath::{
16        communication_schemes::CommunicationScheme,
17        contraction_cost::{compute_memory_requirements, contract_size_tensors_bytes},
18        paths::{
19            cotengrust::{Cotengrust, OptMethod},
20            ContractionPathResult, Pathfinder,
21        },
22        repartitioning::compute_solution,
23        SimplePath,
24    },
25    tensornetwork::{
26        partitioning::partition_tensor_network,
27        tensor::{CompositeTensor, LeafTensor},
28    },
29};
30
31type ScoreType = NotNan<f64>;
32
33/// Number of threads to use for processing candidate solutions in parallel. This is
34/// a constant (and not hardware-aware) for reproducibility.
35const PROCESSING_THREADS: usize = 48;
36
37/// OptModel is a trait that defines requirements to be used with optimization algorithm
38pub trait OptModel: Sync + Send {
39    /// Type of the Solution
40    type SolutionType: Clone + Sync + Send;
41
42    /// Generate a new trial solution from current solution
43    fn generate_trial_solution<R: Rng>(
44        &self,
45        current_solution: Self::SolutionType,
46        rng: &mut R,
47    ) -> Self::SolutionType;
48
49    /// Evaluate the score of the solution
50    fn evaluate<R: Rng>(&self, solution: &Self::SolutionType, rng: &mut R) -> ScoreType;
51}
52
53/// Optimizer that implements the simulated annealing algorithm
54#[derive(Clone, Copy)]
55pub struct SimulatedAnnealingOptimizer {
56    /// Number of candidate solutions to generate and evaluate in each iteration.
57    n_trials: usize,
58    /// Total duration to take for the optimization
59    max_time: Duration,
60    /// Number of steps to take in each temperature iteration.
61    n_steps: usize,
62    /// Number of iterations without improvement after which the algorithm should
63    /// restart from the best solution found so far.
64    restart_iter: usize,
65    /// The initial temperature to start the annealing process with.
66    initial_temperature: f64,
67    /// The final temperature to reach at the end of the annealing process.
68    final_temperature: f64,
69}
70
71/// Linearly interpolates between two numbers based on parameter `t`.
72///
73/// Computes `start + (end - start) * t`.
74#[inline]
75fn linear_interpolation(start: f64, end: f64, t: f64) -> f64 {
76    (end - start).mul_add(t, start)
77}
78
79impl SimulatedAnnealingOptimizer {
80    /// Start optimization with given temperature range
81    ///
82    /// - `model` : the model to optimize
83    /// - `initial_solution` : the initial solution to start optimization.
84    /// - `n_iter`: maximum iterations
85    #[allow(clippy::too_many_arguments)]
86    fn optimize_with_temperature<M, R>(
87        &self,
88        model: &M,
89        initial_solution: M::SolutionType,
90        rng: &mut R,
91    ) -> (M::SolutionType, ScoreType)
92    where
93        M: OptModel,
94        R: Rng,
95    {
96        let mut current_score = model.evaluate(&initial_solution, rng);
97        let mut current_solution = initial_solution;
98        let mut best_solution = current_solution.clone();
99        let mut best_score = current_score;
100        let mut last_improvement = 0;
101        let steps_per_thread = self.n_steps.div_ceil(self.n_trials);
102
103        let log_start = self.initial_temperature.log2();
104        let log_end = self.final_temperature.log2();
105        let total_seconds = self.max_time.as_secs_f64();
106        let mut temperature = self.initial_temperature;
107        let mut rngs = (0..self.n_trials)
108            .map(|_| StdRng::seed_from_u64(rng.random()))
109            .collect_vec();
110        let end_time = Instant::now() + self.max_time;
111        loop {
112            // Generate and evaluate candidate solutions to find the minimum objective
113            let (_, trial_solution, trial_score) = rngs
114                .par_iter_mut()
115                .enumerate()
116                .map(|(index, rng)| {
117                    let mut trial_score = current_score;
118                    let mut trial_solution = current_solution.clone();
119                    for _ in 0..steps_per_thread {
120                        let solution = model.generate_trial_solution(trial_solution.clone(), rng);
121                        let score = model.evaluate(&solution, rng);
122
123                        let diff = (score / trial_score).log2();
124                        let acceptance_probability = (-diff / temperature).exp();
125                        let random_value = rng.random();
126
127                        if acceptance_probability >= random_value {
128                            trial_solution = solution;
129                            trial_score = score;
130                        }
131                    }
132                    (index, trial_solution, trial_score)
133                })
134                .min_by_key(|(index, _, score)| (*score, *index))
135                .unwrap();
136
137            current_score = trial_score;
138            current_solution = trial_solution;
139
140            // Update the best solution if the current solution is better
141            if current_score < best_score {
142                best_solution = current_solution.clone();
143                best_score = current_score;
144                last_improvement = 0;
145            }
146
147            last_improvement += 1;
148
149            // Check if we should restart from the best solution
150            if last_improvement == self.restart_iter {
151                current_solution = best_solution.clone();
152                current_score = best_score;
153            }
154
155            // Estimate the number of remaining iterations and adapt the temperature
156            let now = Instant::now();
157            if now > end_time {
158                // We've reached the time limit
159                break;
160            }
161            let remaining_time = (end_time - now).as_secs_f64();
162            let progress = 1.0 - remaining_time / total_seconds;
163            temperature = linear_interpolation(log_start, log_end, progress).exp2();
164        }
165
166        (best_solution, best_score)
167    }
168}
169
170/// Common evaluation method for all simulated annealing methods.
171fn evaluate_partitioning<R>(
172    tensor: &CompositeTensor,
173    partitioning: &[usize],
174    communication_scheme: CommunicationScheme,
175    memory_limit: Option<f64>,
176    rng: &mut R,
177) -> NotNan<f64>
178where
179    R: Rng,
180{
181    // Construct the tensor network and contraction path from the partitioning
182    let (partitioned_tn, path, parallel_cost, _) =
183        compute_solution(tensor, partitioning, communication_scheme, Some(rng));
184
185    // If the memory limit is exceeded, return infinity
186    if let Some(limit) = memory_limit {
187        // Compute memory usage
188        let mem = compute_memory_requirements(
189            partitioned_tn.tensors(),
190            &path,
191            contract_size_tensors_bytes,
192        );
193
194        if mem > limit {
195            return NotNan::new(f64::INFINITY).unwrap();
196        }
197    }
198    NotNan::new(parallel_cost).unwrap()
199}
200
201/// A simulated annealing model that moves a random tensor between random partitions.
202pub struct NaivePartitioningModel<'a> {
203    pub tensor: &'a CompositeTensor,
204    pub num_partitions: usize,
205    pub communication_scheme: CommunicationScheme,
206    pub memory_limit: Option<f64>,
207}
208
209impl OptModel for NaivePartitioningModel<'_> {
210    type SolutionType = Vec<usize>;
211
212    fn generate_trial_solution<R: Rng>(
213        &self,
214        mut current_solution: Self::SolutionType,
215        rng: &mut R,
216    ) -> Self::SolutionType {
217        let tensor_index = rng.random_range(0..current_solution.len());
218        let current_partition = current_solution[tensor_index];
219        let new_partition = loop {
220            let b = rng.random_range(0..self.num_partitions);
221            if b != current_partition {
222                break b;
223            }
224        };
225        current_solution[tensor_index] = new_partition;
226        current_solution
227    }
228
229    fn evaluate<R: Rng>(&self, solution: &Self::SolutionType, rng: &mut R) -> ScoreType {
230        evaluate_partitioning(
231            self.tensor,
232            solution,
233            self.communication_scheme,
234            self.memory_limit,
235            rng,
236        )
237    }
238}
239
240/// A simulated annealing model that moves a random subtree between random partitions.
241pub struct NaiveIntermediatePartitioningModel<'a> {
242    pub tensor: &'a CompositeTensor,
243    pub num_partitions: usize,
244    pub communication_scheme: CommunicationScheme,
245    pub memory_limit: Option<f64>,
246}
247
248impl OptModel for NaiveIntermediatePartitioningModel<'_> {
249    type SolutionType = (Vec<usize>, Vec<SimplePath>);
250
251    fn generate_trial_solution<R: Rng>(
252        &self,
253        current_solution: Self::SolutionType,
254        rng: &mut R,
255    ) -> Self::SolutionType {
256        let (mut partitioning, mut contraction_paths) = current_solution;
257
258        // Select source partition (with more than one tensor)
259        let source_partition = contraction_paths
260            .iter()
261            .enumerate()
262            .filter_map(|(contraction_id, contraction)| {
263                if contraction.len() >= 3 {
264                    Some(contraction_id)
265                } else {
266                    None
267                }
268            })
269            .choose(rng);
270
271        let Some(source_partition) = source_partition else {
272            // No viable partitions, return the current solution
273            return (partitioning, contraction_paths);
274        };
275
276        // Select random tensor contraction in source partition
277        let pair_index = rng.random_range(0..contraction_paths[source_partition].len() - 1);
278        let (i, j) = contraction_paths[source_partition][pair_index];
279        let mut tensor_leaves = FxHashSet::from_iter([i, j]);
280
281        // Gather all tensors that contribute to the selected contraction
282        for (i, j) in contraction_paths[source_partition]
283            .iter()
284            .take(pair_index)
285            .rev()
286        {
287            if tensor_leaves.contains(i) {
288                tensor_leaves.insert(*j);
289            }
290        }
291
292        let mut shifted_indices = Vec::with_capacity(tensor_leaves.len());
293        for (partition_tensor_index, (i, _partition)) in partitioning
294            .iter()
295            .enumerate()
296            .filter(|(_, partition)| *partition == &source_partition)
297            .enumerate()
298        {
299            if tensor_leaves.contains(&partition_tensor_index) {
300                shifted_indices.push(i);
301            }
302        }
303
304        // Select random target partition
305        let target_partition = loop {
306            let b = rng.random_range(0..self.num_partitions);
307            if b != source_partition {
308                break b;
309            }
310        };
311
312        // Change partition
313        for index in shifted_indices {
314            partitioning[index] = target_partition;
315        }
316
317        // Recompute the contraction path for both partitions
318        let mut from_tensor = CompositeTensor::default();
319        let mut to_tensor = CompositeTensor::default();
320        for (partition_index, tensor) in zip(&partitioning, self.tensor.tensors()) {
321            if *partition_index == source_partition {
322                from_tensor.push_tensor(tensor.clone());
323            } else if *partition_index == target_partition {
324                to_tensor.push_tensor(tensor.clone());
325            }
326        }
327
328        let mut from_opt = Cotengrust::new(OptMethod::Greedy);
329        let result = from_opt.find_path(&from_tensor);
330        let from_path = result.replace_path();
331        contraction_paths[source_partition] = from_path.into_simple();
332
333        let mut to_opt = Cotengrust::new(OptMethod::Greedy);
334        let result = to_opt.find_path(&to_tensor);
335        let to_path = result.replace_path();
336        contraction_paths[target_partition] = to_path.into_simple();
337
338        (partitioning, contraction_paths)
339    }
340
341    fn evaluate<R: Rng>(&self, solution: &Self::SolutionType, rng: &mut R) -> ScoreType {
342        evaluate_partitioning(
343            self.tensor,
344            &solution.0,
345            self.communication_scheme,
346            self.memory_limit,
347            rng,
348        )
349    }
350}
351
352/// A simulated annealing model that moves a random tensor to the partition that
353/// maximizes memory reduction.
354pub struct LeafPartitioningModel<'a> {
355    pub tensor: &'a CompositeTensor,
356    pub communication_scheme: CommunicationScheme,
357    pub memory_limit: Option<f64>,
358}
359
360impl OptModel for LeafPartitioningModel<'_> {
361    type SolutionType = (Vec<usize>, Vec<LeafTensor>);
362
363    fn generate_trial_solution<R: Rng>(
364        &self,
365        current_solution: Self::SolutionType,
366        rng: &mut R,
367    ) -> Self::SolutionType {
368        let (mut partitioning, mut partition_tensors) = current_solution;
369        let tensor_index = rng.random_range(0..partitioning.len());
370        let shifted_tensor = self.tensor.tensor(tensor_index).as_leaf().unwrap();
371        let source_partition = partitioning[tensor_index];
372
373        let (new_partition, _) = partition_tensors
374            .iter()
375            .enumerate()
376            .filter_map(|(i, partition_tensor)| {
377                if i != source_partition {
378                    Some((
379                        i,
380                        (shifted_tensor ^ partition_tensor).size() - partition_tensor.size(),
381                    ))
382                } else {
383                    // Don't consider old partition as move target (would be a NOOP)
384                    None
385                }
386            })
387            .min_by(|a, b| a.1.total_cmp(&b.1))
388            .unwrap();
389
390        partitioning[tensor_index] = new_partition;
391        partition_tensors[source_partition] ^= shifted_tensor;
392        partition_tensors[new_partition] ^= shifted_tensor;
393        (partitioning, partition_tensors)
394    }
395
396    fn evaluate<R: Rng>(&self, solution: &Self::SolutionType, rng: &mut R) -> ScoreType {
397        evaluate_partitioning(
398            self.tensor,
399            &solution.0,
400            self.communication_scheme,
401            self.memory_limit,
402            rng,
403        )
404    }
405}
406
407/// A simulated annealing model that moves a random subtree to the partition that
408/// maximizes memory reduction.
409pub struct IntermediatePartitioningModel<'a> {
410    pub tensor: &'a CompositeTensor,
411    pub communication_scheme: CommunicationScheme,
412    pub memory_limit: Option<f64>,
413}
414
415impl IntermediatePartitioningModel<'_> {
416    pub fn compute_initial_solution(
417        &self,
418        initial_partitioning: &[usize],
419        initial_contraction_paths: Option<Vec<SimplePath>>,
420    ) -> <IntermediatePartitioningModel<'_> as OptModel>::SolutionType {
421        let partitioned_tn = partition_tensor_network(self.tensor.clone(), initial_partitioning);
422
423        // Get the partition tensors
424        let partition_tensors = partitioned_tn
425            .tensors()
426            .iter()
427            .map(|t| t.as_composite().unwrap().external_tensor())
428            .collect_vec();
429
430        let contraction_paths = initial_contraction_paths.unwrap_or_else(|| {
431            // Find contraction paths using Greedy
432            partitioned_tn
433                .tensors()
434                .iter()
435                .map(|t| {
436                    // Find path for this partition
437                    let mut opt = Cotengrust::new(OptMethod::Greedy);
438                    let result = opt.find_path(t.as_composite().unwrap());
439                    let path = result.replace_path();
440                    path.into_simple()
441                })
442                .collect()
443        });
444
445        // Find a contraction path for the partitioned tensor network
446        (
447            initial_partitioning.to_vec(),
448            partition_tensors,
449            contraction_paths,
450        )
451    }
452}
453
454impl OptModel for IntermediatePartitioningModel<'_> {
455    type SolutionType = (Vec<usize>, Vec<LeafTensor>, Vec<SimplePath>);
456
457    fn generate_trial_solution<R: Rng>(
458        &self,
459        current_solution: Self::SolutionType,
460        rng: &mut R,
461    ) -> Self::SolutionType {
462        let (mut partitioning, mut partition_tensors, mut contraction_paths) = current_solution;
463
464        // Select source partition (with more than one tensor)
465        let source_partition = contraction_paths
466            .iter()
467            .enumerate()
468            .filter_map(|(contraction_id, contraction)| {
469                if contraction.len() >= 3 {
470                    Some(contraction_id)
471                } else {
472                    None
473                }
474            })
475            .choose(rng);
476
477        let Some(source_partition) = source_partition else {
478            // No viable partitions, return the current solution
479            return (partitioning, partition_tensors, contraction_paths);
480        };
481
482        // Select random tensor contraction in source partition
483        let pair_index = rng.random_range(0..contraction_paths[source_partition].len() - 1);
484        let (i, j) = contraction_paths[source_partition][pair_index];
485        let mut tensor_leaves = FxHashSet::from_iter([i, j]);
486
487        // Gather all tensors that contribute to the selected contraction
488        for (i, j) in contraction_paths[source_partition]
489            .iter()
490            .take(pair_index)
491            .rev()
492        {
493            if tensor_leaves.contains(i) {
494                tensor_leaves.insert(*j);
495            }
496        }
497
498        let mut shifted_tensor = LeafTensor::default();
499        let mut shifted_indices = Vec::with_capacity(tensor_leaves.len());
500        for (partition_tensor_index, (i, _partition)) in partitioning
501            .iter()
502            .enumerate()
503            .filter(|(_, partition)| *partition == &source_partition)
504            .enumerate()
505        {
506            if tensor_leaves.contains(&partition_tensor_index) {
507                shifted_tensor ^= self.tensor.tensor(i).as_leaf().unwrap();
508                shifted_indices.push(i);
509            }
510        }
511
512        // Find best target partition
513        // Cost function is actually quite important!!
514        let (target_partition, _) = partition_tensors
515            .iter()
516            .enumerate()
517            .filter_map(|(i, partition_tensor)| {
518                if i != source_partition {
519                    Some((
520                        i,
521                        (&shifted_tensor ^ partition_tensor).size() - partition_tensor.size(),
522                    ))
523                } else {
524                    // Don't consider old partition as move target (would be a NOOP)
525                    None
526                }
527            })
528            .min_by(|a, b| a.1.total_cmp(&b.1))
529            .unwrap();
530
531        // Change partition
532        for index in shifted_indices {
533            partitioning[index] = target_partition;
534        }
535
536        // Recompute the tensors for both partitions
537        partition_tensors[source_partition] ^= &shifted_tensor;
538        partition_tensors[target_partition] ^= &shifted_tensor;
539
540        // Recompute the contraction path for both partitions
541        let mut from_tensor = CompositeTensor::default();
542        let mut to_tensor = CompositeTensor::default();
543        for (partition_index, tensor) in zip(&partitioning, self.tensor.tensors()) {
544            if *partition_index == source_partition {
545                from_tensor.push_tensor(tensor.clone());
546            } else if *partition_index == target_partition {
547                to_tensor.push_tensor(tensor.clone());
548            }
549        }
550
551        let mut from_opt = Cotengrust::new(OptMethod::Greedy);
552        let result = from_opt.find_path(&from_tensor);
553        let from_path = result.replace_path();
554        contraction_paths[source_partition] = from_path.into_simple();
555
556        let mut to_opt = Cotengrust::new(OptMethod::Greedy);
557        let result = to_opt.find_path(&to_tensor);
558        let to_path = result.replace_path();
559        contraction_paths[target_partition] = to_path.into_simple();
560
561        (partitioning, partition_tensors, contraction_paths)
562    }
563
564    fn evaluate<R: Rng>(&self, solution: &Self::SolutionType, rng: &mut R) -> ScoreType {
565        evaluate_partitioning(
566            self.tensor,
567            &solution.0,
568            self.communication_scheme,
569            self.memory_limit,
570            rng,
571        )
572    }
573}
574
575/// Runs simulated annealing to find a better partitioning.
576pub fn balance_partitions<R, M>(
577    model: M,
578    initial_solution: M::SolutionType,
579    rng: &mut R,
580    max_time: Duration,
581) -> (M::SolutionType, ScoreType)
582where
583    R: Rng,
584    M: OptModel,
585{
586    let optimizer = SimulatedAnnealingOptimizer {
587        n_trials: PROCESSING_THREADS,
588        max_time,
589        n_steps: PROCESSING_THREADS * 10,
590        restart_iter: 50,
591        initial_temperature: 2.0,
592        final_temperature: 0.05,
593    };
594    optimizer.optimize_with_temperature(&model, initial_solution, rng)
595}
596
597#[cfg(test)]
598mod tests {
599    use approx::assert_abs_diff_eq;
600
601    use super::*;
602
603    #[test]
604    fn simple_linear_interpolation() {
605        assert_abs_diff_eq!(linear_interpolation(0., 6., 0.5), 3.0);
606        assert_abs_diff_eq!(linear_interpolation(-1.0, 4.0, 0.2), 0.0);
607        assert_abs_diff_eq!(linear_interpolation(-7.0, -6.0, 0.0), -7.0);
608        assert_abs_diff_eq!(linear_interpolation(3.0, 5.0, 1.0), 5.0);
609    }
610
611    #[test]
612    fn small_leaf_partitioning() {
613        let t1 = LeafTensor::new_from_const(vec![0, 1], 2);
614        let t2 = LeafTensor::new_from_const(vec![2, 3], 2);
615        let t3 = LeafTensor::new_from_const(vec![0, 1, 4], 2);
616        let t4 = LeafTensor::new_from_const(vec![2, 3, 4], 2);
617        let tn = CompositeTensor::new(vec![t1.clone(), t2.clone(), t3.clone(), t4.clone()]);
618        let tn1 = CompositeTensor::new(vec![t1, t2]);
619        let tn2 = CompositeTensor::new(vec![t3, t4]);
620        let initial_partitioning = vec![0, 0, 1, 1];
621        let initial_partitions = vec![tn1.external_tensor(), tn2.external_tensor()];
622        let mut rng = StdRng::seed_from_u64(42);
623
624        let ((partitioning, _partitions), _) = balance_partitions(
625            LeafPartitioningModel {
626                tensor: &tn,
627                communication_scheme: CommunicationScheme::Greedy,
628                memory_limit: None,
629            },
630            (initial_partitioning, initial_partitions),
631            &mut rng,
632            Duration::from_secs(2),
633        );
634        // Normalize for comparability
635        let ref_partitioning = if partitioning[0] == 0 {
636            [0, 1, 0, 1]
637        } else {
638            [1, 0, 1, 0]
639        };
640        assert_eq!(partitioning, ref_partitioning);
641    }
642}
tnc/contractionpath/repartitioning/simulated_annealing.rs

tnc/contractionpath/repartitioning/
simulated_annealing.rs