Nudzi mi się to wymyślam sobie koło na nowo:
package com.task;
import com.google.common.collect.Streams;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Random;
import java.util.Set;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class Task {
interface SampleAlgorithm <T extends Collection<?>> {
T sample(T collection, int sampleSize);
String getName();
}
interface SetSampleAlgorithm extends SampleAlgorithm<Set<Integer>> {
Set<Integer> sample(Set<Integer> set, int sampleSize);
String getName();
}
static class ShuffleSampleAlg implements SetSampleAlgorithm {
static final String NAME = "shuffle";
@Override
public Set<Integer> sample(Set<Integer> set, int sampleSize) {
List<Integer> listFromSet = new ArrayList<>(set);
Collections.shuffle(listFromSet);
return listFromSet.stream().limit(sampleSize).collect(Collectors.toSet());
}
@Override
public String getName() {
return NAME;
}
}
static class ArraySampleAlg implements SetSampleAlgorithm {
static final String NAME = "array";
@Override
public Set<Integer> sample(Set<Integer> set, int sampleSize) {
List<Integer> listFromSet = new ArrayList<>(set);
Set<Integer> rndIdx = new Random().ints(sampleSize, 0, set.size()).boxed().collect(Collectors.toSet());
return rndIdx.stream().map(i -> listFromSet.get(i)).collect(Collectors.toSet());
}
@Override
public String getName() {
return NAME;
}
}
static class IterateSampleAlg implements SetSampleAlgorithm {
static final String NAME = "iterate";
@Override
public Set<Integer> sample(Set<Integer> set, int sampleSize) {
Set<Integer> rndIdx = new Random().ints().limit(sampleSize).boxed().collect(Collectors.toSet());
return Streams.mapWithIndex(
set.stream(),
(elem, idx) -> rndIdx.contains(idx) ? elem : null
)
.filter(Objects::nonNull)
.collect(Collectors.toSet());
}
@Override
public String getName() {
return NAME;
}
}
public static void main(String[] args) {
List<Integer> sampleSizes = List.of(9, 999, 99999, 999999);
Set<Integer> set = Stream.iterate(0, i -> i + 1)
.limit(9999999)
.map(i -> new Random().nextInt())
.collect(Collectors.toUnmodifiableSet());
List<SampleAlgorithm<Set<Integer>>> algorithms = List.of(
new ShuffleSampleAlg(),
new ArraySampleAlg(),
new IterateSampleAlg()
);
algorithms.forEach(alg -> sampleSizes.forEach(sampleSize -> sample(set, sampleSize, alg)));
}
private static void sample(Set<Integer> set, int sampleSize, SampleAlgorithm<Set<Integer>> alg) {
sample(
() -> alg.sample(set, sampleSize),
alg.getName(),
sampleSize
);
}
private static void sample(Supplier<Set<Integer>> supp, String name, int sampleSize) {
long start = System.currentTimeMillis();
Set<Integer> sampledSet = supp.get();
long stop = System.currentTimeMillis();
System.err.println(name + " size: " + sampleSize + " score: " + (stop - start));
// to make sampledList used to be sure JIT will not erase getting sampledList
try {
new PrintWriter(File.createTempFile("aaa", "bbb")).print(sampledSet);
} catch (IOException e) {
e.printStackTrace();
}
}
}
średnia z 5 wywołań:
co ciekawe, gdy sample jest bliski całemu setowi samplowanemu szybsza okazuje się iteracja.
macie coś lepszego?