Skip to content

Commit

Permalink
some test failures, but getting closer
Browse files Browse the repository at this point in the history
  • Loading branch information
ztellman committed Aug 22, 2020
1 parent d0c94b5 commit 5da6949
Show file tree
Hide file tree
Showing 24 changed files with 400 additions and 217 deletions.
4 changes: 2 additions & 2 deletions src/io/lacuna/bifurcan/DurableEncodings.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
import java.util.Objects;
import java.util.function.*;

import static io.lacuna.bifurcan.durable.codecs.Util.decodeBlock;
import static io.lacuna.bifurcan.durable.codecs.Util.encodeBlock;
import static io.lacuna.bifurcan.durable.codecs.Core.decodeBlock;
import static io.lacuna.bifurcan.durable.codecs.Core.encodeBlock;

/**
* Utility methods for constructing {@link IDurableEncoding}s.
Expand Down
2 changes: 1 addition & 1 deletion src/io/lacuna/bifurcan/DurableInput.java
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ default String hexBytes() {
}

/**
* @returnb the bounds for this input
* @return the bounds for this input
*/
Bounds bounds();

Expand Down
16 changes: 3 additions & 13 deletions src/io/lacuna/bifurcan/DurableList.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@
import io.lacuna.bifurcan.durable.Dependencies;
import io.lacuna.bifurcan.durable.Roots;
import io.lacuna.bifurcan.durable.codecs.List;
import io.lacuna.bifurcan.durable.codecs.SkipTable;
import io.lacuna.bifurcan.durable.io.DurableBuffer;
import io.lacuna.bifurcan.durable.io.FileOutput;
import io.lacuna.bifurcan.utils.Iterators;

import java.nio.file.Path;
import java.util.Iterator;

import static io.lacuna.bifurcan.durable.codecs.Util.decodeBlock;
import static io.lacuna.bifurcan.durable.codecs.Core.decodeBlock;

public class DurableList<V> implements IDurableCollection, IList<V> {

Expand All @@ -38,17 +37,8 @@ public static <V> DurableList<V> open(IDurableEncoding.List encoding, Path path)
}

public static <V> DurableList<V> from(Iterator<V> elements, IDurableEncoding.List encoding, Path directory) {
Dependencies.enter();
DurableBuffer acc = new DurableBuffer();
List.encode(elements, encoding, acc);

FileOutput file = new FileOutput(Dependencies.exit(), Map.empty());
DurableOutput out = DurableOutput.from(file);
acc.flushTo(out);
out.close();

Path path = file.moveTo(directory);
return (DurableList<V>) Roots.open(path).decode(encoding);
Fingerprint f = FileOutput.write(directory, Map.empty(), acc -> List.encode(elements, encoding, acc));
return (DurableList<V>) Roots.open(directory, f).decode(encoding);
}

@Override
Expand Down
31 changes: 11 additions & 20 deletions src/io/lacuna/bifurcan/DurableMap.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,26 +45,12 @@ public static <K, V> DurableMap<K, V> open(Path path, IDurableEncoding.Map encod
return (DurableMap<K, V>) Roots.open(path).decode(encoding);
}

public static <K, V> void encode(Iterator<IEntry<K, V>> entries, IDurableEncoding.Map encoding, int maxRealizedEntries, DurableOutput out) {
HashMap.encodeSortedEntries(HashMap.sortEntries(entries, encoding, maxRealizedEntries), encoding, out);
}

public static <K, V> DurableMap<K, V> decode(IDurableEncoding.Map encoding, Root root, DurableInput.Pool pool ) {
return HashMap.decode(encoding, root, pool);
}

public static <K, V> DurableMap<K, V> from(Iterator<IEntry<K, V>> entries, IDurableEncoding.Map encoding, Path directory, int maxRealizedEntries) {
Dependencies.enter();
DurableBuffer acc = new DurableBuffer();
encode(entries, encoding, maxRealizedEntries, acc);

FileOutput file = new FileOutput(Dependencies.exit(), Map.empty());
DurableOutput out = DurableOutput.from(file);
acc.flushTo(out);
out.close();

Path path = file.moveTo(directory);
return (DurableMap<K, V>) Roots.open(path).decode(encoding);
Fingerprint f = FileOutput.write(
directory,
Map.empty(),
acc -> HashMap.encodeSortedEntries(HashMap.sortEntries(entries, encoding, maxRealizedEntries), encoding, acc));
return (DurableMap<K, V>) Roots.open(directory, f).decode(encoding);
}

private Iterator<HashMapEntries> chunkedEntries(long offset) {
Expand Down Expand Up @@ -136,11 +122,16 @@ public IEntry.WithHash<K, V> nth(long idx) {

@Override
public Iterator<IEntry<K, V>> iterator() {
return (Iterator) hashSortedEntries();
}

@Override
public Iterator<IEntry.WithHash<K, V>> hashSortedEntries() {
return Iterators.flatMap(
chunkedEntries(0),
chunk -> Iterators.map(
chunk.entries(0),
e -> IEntry.of((K) e.key(), (V) e.value())));
e -> IEntry.of(e.keyHash(), (K) e.key(), (V) e.value())));
}

@Override
Expand Down
19 changes: 3 additions & 16 deletions src/io/lacuna/bifurcan/ICollection.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,12 @@

import io.lacuna.bifurcan.durable.Dependencies;
import io.lacuna.bifurcan.durable.Roots;
import io.lacuna.bifurcan.durable.codecs.Diffs;
import io.lacuna.bifurcan.durable.codecs.HashMap;
import io.lacuna.bifurcan.durable.codecs.Util;
import io.lacuna.bifurcan.durable.codecs.Core;
import io.lacuna.bifurcan.durable.io.DurableBuffer;
import io.lacuna.bifurcan.durable.io.FileOutput;

import java.nio.file.Path;
import java.util.Iterator;
import java.util.Optional;

/**
* @author ztellman
Expand Down Expand Up @@ -93,17 +90,7 @@ default Iterator<V> iterator() {
C clone();

default C save(IDurableEncoding encoding, Path directory) {
Dependencies.enter();
DurableBuffer acc = new DurableBuffer();

Util.encodeSingleton(this, encoding, acc);

FileOutput file = new FileOutput(Dependencies.exit(), Map.empty());
DurableOutput out = DurableOutput.from(file);
acc.flushTo(out);
out.close();

Path path = file.moveTo(directory);
return (C) Roots.open(path).decode(encoding);
IDurableCollection.Fingerprint f = FileOutput.write(directory, Map.empty(), acc -> Core.encodeSingleton(this, encoding, acc));
return (C) Roots.open(directory, f).decode(encoding);
}
}
68 changes: 43 additions & 25 deletions src/io/lacuna/bifurcan/IDiffMap.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
package io.lacuna.bifurcan;

import io.lacuna.bifurcan.diffs.Util;
import io.lacuna.bifurcan.durable.codecs.SkipTable;
import io.lacuna.bifurcan.durable.io.DurableBuffer;
import io.lacuna.bifurcan.utils.Iterators;

import java.util.Comparator;
import java.util.Iterator;
import java.util.OptionalLong;
import java.util.PrimitiveIterator;
Expand Down Expand Up @@ -78,47 +81,62 @@ default Iterator<IEntry<K, V>> iterator() {
added().entries().iterator());
}

static <K, V> IList<IDiffMap<K, V>> diffStack(IDiffMap<K, V> m) {
List<IDiffMap<K, V>> result = new List<IDiffMap<K, V>>().linear();
IDiffMap<K, V> curr = m;
for (;;) {
result.addFirst(curr);
if (curr.underlying() instanceof IDiffMap) {
curr = (IDiffMap<K, V>) curr.underlying();
} else {
break;
}
}
return result;
}
static PrimitiveIterator.OfLong mergedRemovedIndices(IList<IDiffMap<?, ?>> diffStack) {
assert diffStack.stream().allMatch(m -> m instanceof IMap.Durable);

static PrimitiveIterator.OfLong compactedRemovedIndices(IList<IDiffMap<?, ?>> diffStack) {
// isolate the removed indices which only apply to the underlying collection (which is potentially shrinking with
// each new stacked diff)
IList<Iterator<Long>> iterators = new LinearList<>();
long underlyingSize = diffStack.first().underlying().size();
long removed = 0;
for (IDiffMap<?, ?> m : diffStack) {
long underlyingRemainingSize = underlyingSize - removed;
ISortedSet<Long> s = m.removedIndices().slice(0L, underlyingRemainingSize - 1);
long remainingUnderlyingSize = underlyingSize - removed;
ISortedSet<Long> s = m.removedIndices().slice(0L, remainingUnderlyingSize - 1);
iterators.addLast(s.iterator());
removed += s.size();
}

return Util.mergedRemovedIndices(iterators);
}

static <K, V> Iterator<IEntry<K, V>> compactedAddedEntries(IList<IDiffMap<K, V>> diffStack) {
static <K, V> Iterator<IEntry.WithHash<K, V>> mergedAddedEntries(IList<IDiffMap<K, V>> diffStack) {
assert diffStack.stream().allMatch(m -> m instanceof IMap.Durable);

// isolate the removed indices which only apply to the added entries
IList<Iterator<Long>> iterators = new LinearList<>();
long underlyingSize = diffStack.first().underlying().size();
long removed = 0;
for (IDiffMap<?, ?> m : diffStack) {
long underlyingRemainingSize = underlyingSize - removed;
ISortedSet<Long> underlyingIndices = m.removedIndices().slice(0L, underlyingRemainingSize - 1);
ISortedSet<Long> addedIndices = m.removedIndices().slice(underlyingRemainingSize, Long.MAX_VALUE);
iterators.addLast(Iterators.map(addedIndices.iterator(), n -> n - underlyingRemainingSize));
for (IDiffMap<K, V> m : diffStack) {
long remainingUnderlyingSize = underlyingSize - removed;
ISortedSet<Long> underlyingIndices = m.removedIndices().slice(0L, remainingUnderlyingSize - 1);
ISortedSet<Long> addedIndices = m.removedIndices().slice(remainingUnderlyingSize, Long.MAX_VALUE);
iterators.addLast(Iterators.map(addedIndices.iterator(), n -> n - remainingUnderlyingSize));
removed += underlyingIndices.size();
}

PrimitiveIterator.OfLong removedFromAdded = Util.mergedRemovedIndices(iterators);
IList<IEntry<K, V>> added = diffStack.stream().map(m -> m.added().entries()).reduce(Lists::concat).get();
return Util.skipIndices(added.iterator(), removedFromAdded);
SkipTable.Writer writer = new SkipTable.Writer();
Util.mergedRemovedIndices(iterators).forEachRemaining((long idx) -> writer.append(idx, 0));

// for this to consume too much memory would require >100M entries being repeatedly overwritten within the stack
// of diffs, which implies that many entries being in-memory at once, which seems far-fetched enough that I'm not
// going to worry about it for now
// TODO: worry about it
ISortedSet<Long> removedIndices = writer.toOffHeapMap().keys();

// get the hash-sorted entries (which are in the same order as entries() because it's a durable map) from each
// added() and filter out the removed entries from each
IList<Iterator<IEntry.WithHash<K, V>>> sortedEntries = new LinearList<>();
long offset = 0;
for (IDiffMap<K, V> m : diffStack) {
long size = m.added().size();
long currOffset = offset;
sortedEntries.addLast(
Util.skipIndices(
m.added().hashSortedEntries(),
Iterators.map(removedIndices.slice(currOffset, currOffset + size - 1).iterator(), n -> n - currOffset)));
offset += size;
}

return Iterators.mergeSort(sortedEntries, Comparator.comparing(IEntry.WithHash::keyHash));
}
}
54 changes: 44 additions & 10 deletions src/io/lacuna/bifurcan/IDurableCollection.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,22 @@

import io.lacuna.bifurcan.durable.Bytes;
import io.lacuna.bifurcan.durable.Roots;
import io.lacuna.bifurcan.durable.Util;
import io.lacuna.bifurcan.durable.io.BufferInput;
import io.lacuna.bifurcan.durable.io.DurableBuffer;
import io.lacuna.bifurcan.durable.codecs.Core;
import io.lacuna.bifurcan.durable.io.FileOutput;

import java.io.Closeable;
import java.io.File;
import java.nio.ByteBuffer;
import java.nio.file.Path;
import java.util.Iterator;
import java.util.function.Function;

import static io.lacuna.bifurcan.durable.codecs.Util.decodeCollection;
import static io.lacuna.bifurcan.durable.codecs.Core.decodeCollection;

public interface IDurableCollection {

interface Fingerprint extends Comparable<Fingerprint> {
String ALGORITHM = "SHA-512";
int HASH_BYTES = 32;

byte[] binary();

default String toHexString() {
Expand Down Expand Up @@ -64,13 +65,46 @@ default IDurableCollection decode(IDurableEncoding encoding) {
}
}

interface Rebase<T extends IDurableCollection> {
T apply(T collection);
}

IDurableEncoding encoding();

DurableInput.Pool bytes();

Root root();

interface Rebase {
<T extends IDurableCollection> T apply(T collection);
}

default Rebase compact(ISet<Fingerprint> compactSet) {
Fingerprint fingerprint = root().fingerprint();
DirectedAcyclicGraph<Fingerprint, Void> compactGraph = root().dependencyGraph().select(compactSet);

ISet<Fingerprint> unexpectedRoots = compactGraph.top().remove(fingerprint);
if (unexpectedRoots.size() > 0) {
throw new IllegalArgumentException("unexpected roots in `compactSet`: " + unexpectedRoots);
}

System.out.println(compactSet + " " + compactGraph + " " + root().dependencyGraph());
ISet<Fingerprint> reachable = Set.from(Graphs.bfsVertices(fingerprint, compactGraph::out));
if (reachable.size() < compactSet.size()) {
throw new IllegalArgumentException("disconnected elements in `compactSet`: " + compactSet.difference(reachable));
}

Fingerprint compacted = Core.compacting(
compactSet,
() -> FileOutput.write(
root().path().getParent(),
Map.empty(),
acc -> Core.encodeSingleton(this, encoding(), acc)));

IMap<Fingerprint, Fingerprint> rebases = new Map<Fingerprint, Fingerprint>().put(fingerprint, compacted);
return new Rebase() {
@Override
public <T extends IDurableCollection> T apply(T c) {
Path dir = c.root().path().getParent();
Fingerprint f = FileOutput.write(dir, rebases, acc -> Core.encodeSingleton(c, c.encoding(), acc));
return (T) Roots.open(dir, f).decode(c.encoding());
}
};
}
}
2 changes: 1 addition & 1 deletion src/io/lacuna/bifurcan/IDurableEncoding.java
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ default boolean isSingleton(Object o) {
interface Unityped extends Map, Set, List, Primitive {
@Override
default boolean isSingleton(Object o) {
return o instanceof ICollection;
return o instanceof ICollection || o instanceof java.util.Collection;
}

@Override
Expand Down
6 changes: 1 addition & 5 deletions src/io/lacuna/bifurcan/IMap.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
package io.lacuna.bifurcan;

import io.lacuna.bifurcan.diffs.DiffMap;
import io.lacuna.bifurcan.durable.Dependencies;
import io.lacuna.bifurcan.durable.Roots;
import io.lacuna.bifurcan.durable.codecs.Diffs;
import io.lacuna.bifurcan.durable.io.FileOutput;
import io.lacuna.bifurcan.durable.io.DurableBuffer;
import io.lacuna.bifurcan.durable.codecs.HashMap;
import io.lacuna.bifurcan.utils.Iterators;

Expand All @@ -24,6 +19,7 @@ public interface IMap<K, V> extends
Function<K, V> {

interface Durable<K, V> extends IMap<K,V>, IDurableCollection {
IDurableEncoding.Map encoding();
}

/**
Expand Down
2 changes: 1 addition & 1 deletion src/io/lacuna/bifurcan/ISortedSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ default ISortedSet<V> slice(V min, V max) {
} else {
long minIdx = oMinIdx.getAsLong();
long maxIdx = oMaxIdx.getAsLong();
return Sets.from(elements().slice(minIdx, maxIdx), comparator(), v -> {
return Sets.from(elements().slice(minIdx, maxIdx + 1), comparator(), v -> {
OptionalLong oIdx = floorIndex(v);
if (oIdx.isPresent()) {
long idx = oIdx.getAsLong();
Expand Down
Loading

0 comments on commit 5da6949

Please sign in to comment.