diff --git a/gorscripts/src/test/java/org/gorpipe/gor/cli/link/LinkCommandTest.java b/gorscripts/src/test/java/org/gorpipe/gor/cli/link/UTestLinkCommand.java similarity index 99% rename from gorscripts/src/test/java/org/gorpipe/gor/cli/link/LinkCommandTest.java rename to gorscripts/src/test/java/org/gorpipe/gor/cli/link/UTestLinkCommand.java index 226ea9af..c4d7aa65 100644 --- a/gorscripts/src/test/java/org/gorpipe/gor/cli/link/LinkCommandTest.java +++ b/gorscripts/src/test/java/org/gorpipe/gor/cli/link/UTestLinkCommand.java @@ -16,7 +16,7 @@ import picocli.CommandLine; -public class LinkCommandTest { +public class UTestLinkCommand { @Rule public TemporaryFolder temp = new TemporaryFolder(); diff --git a/gortools/src/main/java/org/gorpipe/gor/cli/link/LinkCommand.java b/gortools/src/main/java/org/gorpipe/gor/cli/link/LinkCommand.java index 483150fd..bd00adbd 100644 --- a/gortools/src/main/java/org/gorpipe/gor/cli/link/LinkCommand.java +++ b/gortools/src/main/java/org/gorpipe/gor/cli/link/LinkCommand.java @@ -6,7 +6,7 @@ @SuppressWarnings("squid:S106") @CommandLine.Command(name = "link", - description = "Manage link files (create, update, rollback, list).", + description = "Manage link files (create, update, resolve, rollback, list).", header = "Link file management commands.", subcommands = {LinkUpdateCommand.class, LinkRollbackCommand.class, LinkResolveCommand.class, LinkListCommand.class}) public class LinkCommand extends HelpOptions implements Runnable { diff --git a/gortools/src/main/java/org/gorpipe/gor/cli/link/LinkResolveCommand.java b/gortools/src/main/java/org/gorpipe/gor/cli/link/LinkResolveCommand.java index 223d3300..d297b3f8 100644 --- a/gortools/src/main/java/org/gorpipe/gor/cli/link/LinkResolveCommand.java +++ b/gortools/src/main/java/org/gorpipe/gor/cli/link/LinkResolveCommand.java @@ -24,6 +24,10 @@ public class LinkResolveCommand implements Runnable { description = "Return the full link file entry instead of only the resolved URL.") private boolean returnFullEntry; + @CommandLine.Option(names = {"-i", "--info-only"}, + description = "Return the link entry info only.") + private boolean returnInfoOnly; + @CommandLine.ParentCommand private LinkCommand mainCmd; @@ -41,8 +45,10 @@ public void run() { String output; if (returnFullEntry) { output = entry.format().replace('\t', ' '); + } else if (returnInfoOnly) { + output = entry.info(); } else { - var resolved = linkFile.getEntryUrl(timestamp); + var resolved = linkFile.getUrlFromEntry(entry); if (Strings.isNullOrEmpty(resolved)) { throw new CommandLine.ParameterException(new CommandLine(this), "No link entry found for the requested time."); diff --git a/gortools/src/main/java/org/gorpipe/gor/cli/link/LinkRollbackCommand.java b/gortools/src/main/java/org/gorpipe/gor/cli/link/LinkRollbackCommand.java index 04f8c176..92974cff 100644 --- a/gortools/src/main/java/org/gorpipe/gor/cli/link/LinkRollbackCommand.java +++ b/gortools/src/main/java/org/gorpipe/gor/cli/link/LinkRollbackCommand.java @@ -1,6 +1,7 @@ package org.gorpipe.gor.cli.link; import org.gorpipe.gor.driver.linkfile.LinkFile; +import org.gorpipe.gor.model.DriverBackedFileReader; import org.gorpipe.util.DateUtils; import picocli.CommandLine; @@ -31,7 +32,7 @@ public void run() { throw new CommandLine.ParameterException(new CommandLine(this), "No entries were removed. Link file may already be at the requested state."); } - linkFile.save(); + linkFile.save(new DriverBackedFileReader(mainCmd.getSecurityContext(), mainCmd.getProjectRoot())); System.err.printf("Rolled back link file %s%n", normalizedLinkPath); } catch (IOException e) { throw new CommandLine.ExecutionException(new CommandLine(this), diff --git a/gortools/src/main/java/org/gorpipe/gor/cli/link/LinkUpdateCommand.java b/gortools/src/main/java/org/gorpipe/gor/cli/link/LinkUpdateCommand.java index 2aa7a34b..5a32128c 100644 --- a/gortools/src/main/java/org/gorpipe/gor/cli/link/LinkUpdateCommand.java +++ b/gortools/src/main/java/org/gorpipe/gor/cli/link/LinkUpdateCommand.java @@ -1,6 +1,8 @@ package org.gorpipe.gor.cli.link; import org.gorpipe.gor.driver.linkfile.LinkFile; +import org.gorpipe.gor.model.DriverBackedFileReader; +import org.gorpipe.gor.util.StringUtil; import org.gorpipe.util.Strings; import picocli.CommandLine; @@ -39,10 +41,11 @@ public class LinkUpdateCommand implements Runnable { public void run() { var normalizedLinkPath = LinkFile.validateAndUpdateLinkFileName(linkFilePath); try { + var reader = new DriverBackedFileReader(mainCmd.getSecurityContext(), mainCmd.getProjectRoot()); var linkFile = LinkFile.loadV1(LinkStreamSourceProvider.resolve(normalizedLinkPath, mainCmd.getSecurityContext(), mainCmd.getProjectRoot(), true, this)); applyHeaders(linkFile); - linkFile.appendEntry(linkValue, entryMd5, entryInfo); - linkFile.save(); + linkFile.appendEntry(linkValue, entryMd5, StringUtil.trimQuotes(entryInfo), reader); + linkFile.save(reader); System.err.printf("Updated link file %s with %s%n", normalizedLinkPath, linkValue); } catch (IOException e) { throw new CommandLine.ExecutionException(new CommandLine(this), diff --git a/gortools/src/test/java/gorsat/Inputs/UTestLink.java b/gortools/src/test/java/org/gorpipe/gor/cli/link/UTestLinkCommandExec.java similarity index 50% rename from gortools/src/test/java/gorsat/Inputs/UTestLink.java rename to gortools/src/test/java/org/gorpipe/gor/cli/link/UTestLinkCommandExec.java index 9026be77..15c70c09 100644 --- a/gortools/src/test/java/gorsat/Inputs/UTestLink.java +++ b/gortools/src/test/java/org/gorpipe/gor/cli/link/UTestLinkCommandExec.java @@ -1,4 +1,4 @@ -package gorsat.Inputs; +package org.gorpipe.gor.cli.link; import gorsat.Commands.CommandParseUtilities; import gorsat.TestUtils; @@ -10,12 +10,14 @@ import org.junit.rules.TemporaryFolder; import java.nio.file.Path; +import java.nio.file.Files; import java.time.Instant; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; -public class UTestLink { +public class UTestLinkCommandExec { @Rule public TemporaryFolder temp = new TemporaryFolder(); @@ -73,7 +75,7 @@ public void testUpdateWithMd5AndInfo() throws Exception { LinkFile link = LinkFile.load(new FileSource(linkFile)); var latest = link.getLatestEntry(); assertEquals("abc123", latest.md5()); - assertEquals("'first entry'", latest.info()); + assertEquals("first entry", latest.info()); } @Test @@ -152,7 +154,146 @@ public void testResolveFullEntry() throws Exception { assertEquals(expectedEntry.replace('\t', ' '), CommandParseUtilities.quoteSafeSplit(res.split("\n")[1], '\t')[2]); } + @Test + public void testResolveEntryInfoOnly() throws Exception { + Path linkFile = temp.getRoot().toPath().resolve("resolve_full.gor.link"); + + TestUtils.runGorPipe("exec gor link update " + linkFile.toString() + " data/file1.gor -i 'info more'"); + String expectedEntry = LinkFile.load(new FileSource(linkFile)).getLatestEntry().format(); + String res = TestUtils.runGorPipe("exec gor link resolve " + linkFile.toString() + " -i"); + + assertEquals("ChromNor\tPosNor\tcol1\nchrN\t0\tinfo more\n", res); + } + private String resolve(Path linkFile, String relative) { return linkFile.getParent().resolve(relative).toAbsolutePath().normalize().toString(); } + + @Test + public void testLinkLimitedByNumberEntries() throws Exception { + Path linkFile = temp.getRoot().toPath().resolve("limit_count.gor.link"); + Path file1 = temp.newFile("file1.gor").toPath(); + Path file2 = temp.newFile("file2.gor").toPath(); + Path file3 = temp.newFile("file3.gor").toPath(); + + // Create link with max 2 entries + TestUtils.runGorPipe("exec gor link update " + linkFile.toString() + " " + file1.toString() + " -h ENTRIES_COUNT_MAX=2"); + Thread.sleep(10); + TestUtils.runGorPipe("exec gor link update " + linkFile.toString() + " " + file2.toString()); + Thread.sleep(10); + TestUtils.runGorPipe("exec gor link update " + linkFile.toString() + " " + file3.toString()); + + LinkFile link = LinkFile.load(new FileSource(linkFile)); + assertEquals(2, link.getEntriesCount()); + // The entries are sorted oldest first. So index 0 is file2, index 1 is file3. + assertEquals(resolve(linkFile, file2.getFileName().toString()), link.getEntries().get(0).url()); + assertEquals(resolve(linkFile, file3.getFileName().toString()), link.getEntries().get(1).url()); + } + + @Test + public void testLinkLimitedByTimestamp() throws Exception { + Path linkFile = temp.getRoot().toPath().resolve("limit_age.gor.link"); + Path file1 = temp.newFile("file1.gor").toPath(); + Path file2 = temp.newFile("file2.gor").toPath(); + + // Max age 200ms + TestUtils.runGorPipe("exec gor link update " + linkFile.toString() + " " + file1.toString() + " -h ENTRIES_AGE_MAX=200"); + + Thread.sleep(300); // Wait > 200ms + + TestUtils.runGorPipe("exec gor link update " + linkFile.toString() + " " + file2.toString()); + + LinkFile link = LinkFile.load(new FileSource(linkFile)); + // file1 should be expired. + assertEquals(1, link.getEntriesCount()); + assertEquals(resolve(linkFile, file2.getFileName().toString()), link.getLatestEntryUrl()); + } + + @Test + public void testLinkLimitedBySize() throws Exception { + Path linkFile = temp.getRoot().toPath().resolve("limit_size.gor.link"); + + // Construct a large info string (~4KB) + StringBuilder largeInfo = new StringBuilder(); + for(int i=0; i<4000; i++) largeInfo.append("a"); + + Path file1 = temp.newFile("file1.gor").toPath(); + Path file2 = temp.newFile("file2.gor").toPath(); + Path file3 = temp.newFile("file3.gor").toPath(); + + // Use LinkFile API directly to allow large inputs easily and control saving + LinkFile link = LinkFile.create(new FileSource(linkFile), ""); + link.setEntriesCountMax(100); + + // Add entries. Total size will exceed default 10000 bytes. + link.appendEntry(file1.toString(), "md5_1", largeInfo.toString()); + link.save(null); + + link.appendEntry(file2.toString(), "md5_2", largeInfo.toString()); + link.save(null); + + link.appendEntry(file3.toString(), "md5_3", largeInfo.toString()); + link.save(null); + + LinkFile reload = LinkFile.load(new FileSource(linkFile)); + // Should have dropped oldest entries to stay under size limit. + // 3 entries * 4KB > 10KB. Expect < 3 entries. + assertTrue("Link file should have less than 3 entries due to size limit", reload.getEntriesCount() < 3); + assertEquals(file3.toString(), reload.getLatestEntryUrl()); + } + + @Test + public void testGarbageCollectionManaged() throws Exception { + Path linkFile = temp.getRoot().toPath().resolve("gc_managed.gor.link"); + Path file1 = temp.newFile("file1.gor").toPath(); + Path file2 = temp.newFile("file2.gor").toPath(); + Path file3 = temp.newFile("file3.gor").toPath(); + + Files.write(file1, "data1".getBytes()); + Files.write(file2, "data2".getBytes()); + Files.write(file3, "data3".getBytes()); + + // ENTRIES_COUNT_MAX=2, DATA_LIFECYCLE_MANAGED=true + TestUtils.runGorPipe("exec gor link update " + linkFile.toString() + " " + file1.toString() + " -h ENTRIES_COUNT_MAX=2 -h DATA_LIFECYCLE_MANAGED=true"); + Thread.sleep(10); + TestUtils.runGorPipe("exec gor link update " + linkFile.toString() + " " + file2.toString()); + + assertTrue(file1.toFile().exists()); + + Thread.sleep(10); + // This update pushes file1 out. + TestUtils.runGorPipe("exec gor link update " + linkFile.toString() + " " + file3.toString()); + + // Wait for async GC + long start = System.currentTimeMillis(); + while(file1.toFile().exists() && System.currentTimeMillis() - start < 5000) { + Thread.sleep(50); + } + + assertFalse("File1 should be deleted (Managed GC)", file1.toFile().exists()); + assertTrue("File2 should exist", file2.toFile().exists()); + assertTrue("File3 should exist", file3.toFile().exists()); + } + + @Test + public void testGarbageCollectionUnmanaged() throws Exception { + Path linkFile = temp.getRoot().toPath().resolve("gc_unmanaged.gor.link"); + Path file1 = temp.newFile("file1.gor").toPath(); + Path file2 = temp.newFile("file2.gor").toPath(); + Path file3 = temp.newFile("file3.gor").toPath(); + + // ENTRIES_COUNT_MAX=2, DATA_LIFECYCLE_MANAGED=false + TestUtils.runGorPipe("exec gor link update " + linkFile.toString() + " " + file1.toString() + " -h ENTRIES_COUNT_MAX=2 -h DATA_LIFECYCLE_MANAGED=false"); + Thread.sleep(10); + TestUtils.runGorPipe("exec gor link update " + linkFile.toString() + " " + file2.toString()); + Thread.sleep(10); + TestUtils.runGorPipe("exec gor link update " + linkFile.toString() + " " + file3.toString()); + + // file1 pushed out. Should NOT be deleted. + Thread.sleep(500); + + assertTrue("File1 should exist (Unmanaged GC)", file1.toFile().exists()); + assertTrue("File2 should exist", file2.toFile().exists()); + assertTrue("File3 should exist", file3.toFile().exists()); + } } diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java index d499afae..b0bee8d0 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFile.java @@ -3,6 +3,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; import com.github.benmanes.caffeine.cache.Cache; @@ -13,11 +14,14 @@ import org.gorpipe.gor.driver.meta.SourceReference; import org.gorpipe.gor.driver.providers.stream.StreamUtils; import org.gorpipe.gor.driver.providers.stream.sources.StreamSource; +import org.gorpipe.gor.model.DriverBackedFileReader; import org.gorpipe.gor.model.FileReader; import org.gorpipe.gor.table.util.PathUtils; import org.gorpipe.gor.util.DataUtil; import org.gorpipe.util.Strings; +import static org.gorpipe.gor.driver.linkfile.LinkFileMeta.HEADER_DATA_LIFECYCLE_MANAGED_KEY; + /** * Class to work with link files, read, write and access metadata. * @@ -54,12 +58,13 @@ public abstract class LinkFile { private static final org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(LinkFile.class); - public static final int LINK_FILE_MAX_SIZE = 10000; - + // Approx max size of link file content to read or write. Stopp adding lines if exceeded. Dont load if twice this size. + public static final int LINK_FILE_MAX_SIZE = Integer.parseInt(System.getProperty("gor.driver.link.maxfilesize", "10000")); private static final boolean USE_LINK_CACHE = Boolean.parseBoolean(System.getProperty("gor.driver.link.cache", "true")); + private static final Cache linkCache = Caffeine.newBuilder() .maximumSize(10000) - .expireAfterWrite(2, TimeUnit.HOURS).build(); + .expireAfterWrite(15, TimeUnit.MINUTES).build(); public static LinkFile load(StreamSource source) throws IOException { var content = loadContentFromSource(source); @@ -112,8 +117,6 @@ public static String validateAndUpdateLinkFileName(String linkFilePath) { } } - - protected final StreamSource source; protected final LinkFileMeta meta; protected final List entries; // Entries sorted by time (oldest first) @@ -145,7 +148,7 @@ public String getLatestEntryUrl() { return getUrlFromEntry(getLatestEntry()); } - private String getUrlFromEntry(LinkFileEntry entry) { + public String getUrlFromEntry(LinkFileEntry entry) { var linkUrl = entry != null ? entry.url() : null; if (linkUrl != null && !PathUtils.isAbsolutePath(linkUrl) && this.source != null) { // Allow relative links: @@ -166,7 +169,7 @@ protected String getHeader() { return meta.formatHeader(); } - List getEntries() { + public List getEntries() { return entries; } @@ -222,7 +225,7 @@ public LinkFile appendEntry(String link, String md5) { } public LinkFile appendEntry(String link, String md5, String info) { - return appendEntry(link, md5, info, null); + return appendEntry(link, md5, info, new DriverBackedFileReader(null, ".")); } public abstract LinkFile appendEntry(String link, String md5, String info, FileReader reader); @@ -262,29 +265,37 @@ public boolean rollbackToTimestamp(long timestamp) { return removed; } - public void save() { - save(-1); + public void save(FileReader reader) { + save(-1, reader); } - public void save(long timestamp) { + public void save(long timestamp, FileReader reader) { try (OutputStream os = source.getOutputStream()) { - save(os, timestamp); + save(os, timestamp, reader); } catch (IOException e) { throw new GorResourceException("Could not save: " + source.getFullPath(), source.getFullPath(), e); } } - private void save(OutputStream os, long timestamp) { + + private void save(OutputStream os, long timestamp, FileReader reader) { meta.setProperty(LinkFileMeta.HEADER_SERIAL_KEY, Integer.toString(Integer.parseInt(meta.getProperty(LinkFileMeta.HEADER_SERIAL_KEY, "0")) + 1)); - var content = new StringBuilder(getHeader()); + var currentTimestamp = timestamp > 0 ? timestamp : System.currentTimeMillis(); + var header = getHeader(); + var content = new StringBuilder(header); + + for (var i = 0; i < entries.size(); i++) { + var entry = entries.get(entries.size() - 1 - i); - if (!entries.isEmpty()) { - var currentTimestamp = timestamp > 0 ? timestamp : System.currentTimeMillis(); - entries.stream() - .skip(Math.max(0, entries.size() - getEntriesCountMax())) - .filter(entry -> entry.timestamp() <= 0 || currentTimestamp - entry.timestamp() <= getEntriesAgeMax()) - .forEach(entry -> content.append(entry.format()).append("\n")); + if ((i >= getEntriesCountMax()) + || (entry.timestamp() > 0 && currentTimestamp - entry.timestamp() > getEntriesAgeMax()) + || (content.length() > LINK_FILE_MAX_SIZE)) { + checkAndGCEntries(0, entries.size() - 1 - i, reader); + break; + } + + content.insert(header.length(), entry.format() + "\n"); } try { @@ -296,6 +307,53 @@ private void save(OutputStream os, long timestamp) { protected abstract List parseEntries(String content); + // Check if we can garbage collect entries between fromIndex and toIndex (inclusive). + + /** + * Check if we can garbage collect entries between fromIndex and toIndex (inclusive), if so do it. + * @param fromIndex fromIndex (inclusive) + * @param toIndex toIndex (inclusive) + */ + protected void checkAndGCEntries(int fromIndex, int toIndex, FileReader reader) { + if (meta.getPropertyBool(HEADER_DATA_LIFECYCLE_MANAGED_KEY, false)) { + List dataUrlsToDelete = new ArrayList<>(); + // Have managed link file. + for (int i = fromIndex; i <= toIndex; i++) { + var entry = entries.get(i); + if (!matchEntryUrls(entry, toIndex + 1, entries.size() - 1)) { + // This entry url is not used by newer entries, can be deleted. + dataUrlsToDelete.add(getUrlFromEntry(entry)); + } + } + + new Thread(() -> gcEntries(dataUrlsToDelete, reader)).start(); + + } + } + + private boolean matchEntryUrls(LinkFileEntry entry, int fromIndex, int toIndex) { + for (int i = fromIndex; i <= toIndex; i++) { + if (entries.get(i).url().equals(entry.url())) { + return true; + } + } + return false; + } + + private void gcEntries(List dataUrlsToDelete, FileReader reader) { + var sourceLinkFielUrl = source.getFullPath(); + for (String linkUrl : dataUrlsToDelete) { + var linkSource = reader.resolveUrl(linkUrl); + if (linkSource != null && linkSource.exists()) { + log.info("Garbage collecting link file {}, entry data for {}", sourceLinkFielUrl, linkUrl); + try { + linkSource.delete(); + } catch (Exception e) { + log.warn("Failed to garbage collect link file {} entry data for {}", sourceLinkFielUrl, linkUrl, e); + } + } + } + } /** * Load content from the source if it exists. @@ -330,8 +388,8 @@ public static String loadContentFromSource(StreamSource source) throws IOExcepti private static String readLimitedLinkContent(StreamSource source) { try (InputStream is = source.open()) { - var content = StreamUtils.readString(is, LINK_FILE_MAX_SIZE); - if (content.length() == LINK_FILE_MAX_SIZE) { + var content = StreamUtils.readString(is, 2 * LINK_FILE_MAX_SIZE); + if (content.length() > 2 * LINK_FILE_MAX_SIZE) { throw new GorResourceException(String.format("Link file '%s' too large (> %d bytes).", source.getFullPath(), LINK_FILE_MAX_SIZE), source.getFullPath()); } diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java index d79a5a01..227a13f8 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileMeta.java @@ -16,6 +16,8 @@ public class LinkFileMeta extends BaseMeta { public static final String HEADER_DATA_LOCATION_KEY = "DATA_LOCATION"; // Should the content lifecycle be managed (data deleted if the link is removed from the link file) (true or false). public static final String HEADER_DATA_LIFECYCLE_MANAGED_KEY = "DATA_LIFECYCLE_MANAGED"; + // Override reuse strategy for versioned links (REUSE, REUSE_DATA, NO_REUSE). + public static final String HEADER_REUSE_STRATEGY_KEY = "REUSE_STRATEGY"; static final String DEFAULT_VERSION = System.getProperty("gor.driver.link.default.version", LinkFileV0.VERSION); diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java index 0597ec07..56ee429e 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileUtil.java @@ -95,6 +95,15 @@ public static String extractLinkMetaOptionData(String options) { return ""; } + /** + * Data record for link data. + * @param linkFile filename of the link file + * @param linkFileContent file the link points to + * @param linkFileMeta meta data / header + * @param linkFileInfo entry info + * @param md5 entry md5 + * @param version Link file version (V0, V1, etc) + */ public record LinkData(String linkFile, String linkFileContent, String linkFileMeta, String linkFileInfo, String md5, String version) {} @@ -134,7 +143,7 @@ public static LinkData extractLinkMetaInfo(String optLinkFileMeta) { for (String s : CommandParseUtilities.quoteSafeSplit(StringUtils.strip(optLinkFileMeta, "\"\'"), ',')) { var l = s.trim(); if (l.startsWith(LinkFileEntryV1.ENTRY_INFO_KEY)) { - linkFileInfo = StringUtils.strip(l.substring(LinkFileEntryV1.ENTRY_INFO_KEY.length() + 1), "\"\'"); + linkFileInfo = StringUtils.strip(l.substring(LinkFileEntryV1.ENTRY_INFO_KEY.length() + 1), "\"\'"); } else { linkFileMeta += "## " + l + "\n"; } @@ -154,6 +163,6 @@ public static void writeLinkFile(FileReader fileReader, LinkData linkData) throw LinkFile.createOrLoad((StreamSource)unsecureFileReader.resolveUrl(linkData.linkFile, true), linkData.version) .appendMeta(linkData.linkFileMeta) .appendEntry(linkData.linkFileContent, linkData.md5, linkData.linkFileInfo, unsecureFileReader) - .save(unsecureFileReader.getQueryTime()); + .save(unsecureFileReader.getQueryTime(), unsecureFileReader); } } diff --git a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV1.java b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV1.java index f6d70c1c..4ce999e1 100644 --- a/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV1.java +++ b/model/src/main/java/org/gorpipe/gor/driver/linkfile/LinkFileV1.java @@ -1,10 +1,8 @@ package org.gorpipe.gor.driver.linkfile; import org.gorpipe.gor.driver.providers.stream.sources.StreamSource; -import org.gorpipe.gor.model.BaseMeta; import org.gorpipe.gor.model.FileReader; -import java.io.IOException; import java.util.List; /** @@ -12,9 +10,20 @@ */ public class LinkFileV1 extends LinkFile { + private static final org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(LinkFileV1.class); + public static final String VERSION = "1"; public static final String DEFAULT_TABLE_HEADER = "#File\tTimestamp\tMD5\tSerial\tInfo"; + enum LinkReuseStrategy { + REUSE, // Reuse previous entru. + REUSE_DATA, // Reuse the data, create new entry. + NO_REUSE // No reuse, create new entry and data. + } + + public static LinkReuseStrategy defaultReuseStrategy + = LinkReuseStrategy.valueOf(System.getProperty("gor.link.versioned.reuse.strategy.default", "NO_REUSE")); + public static boolean allowOverwriteOfTargets = Boolean.parseBoolean(System.getProperty("gor.link.versioned.allow.overwrite", "true")); @@ -33,7 +42,10 @@ public LinkFile appendEntry(String link, String md5, String info, FileReader rea var latestEntry = getLatestEntry(); var entry = new LinkFileEntryV1(link, System.currentTimeMillis(), md5, latestEntry != null ? latestEntry.serial() + 1 : 1, info); validateEntry(entry, reader); - entries.add(entry); + entry = handleRepeatedEntries(entry, reader); + if (entry != null) { + entries.add(entry); + } return this; } @@ -49,6 +61,7 @@ private void validateEntry(LinkFileEntry entry, FileReader reader) { throw new IllegalArgumentException("Entry URL cannot be null or empty"); } if (!allowOverwriteOfTargets) { + // Only applies to non managed data. for (LinkFileEntry existingEntry : entries) { if (existingEntry.url().equals(entry.url()) && !canReuseEntryWithSameUrl(existingEntry, entry, reader)) { throw new IllegalArgumentException("Duplicate entry URL: " + entry.url()); @@ -58,8 +71,10 @@ private void validateEntry(LinkFileEntry entry, FileReader reader) { } private boolean canReuseEntryWithSameUrl(LinkFileEntry oldEntry, LinkFileEntry newEntry, FileReader reader) { - // We can reuse an entry if it is they has the same underlying file, as if not the integrity of the - // versioned link file is violated. + // We can reuse an entry (same url) if the entries have the same underlying file, as if not the integrity of the + // versioned link file is violated (as the new entry file overwrites the old entry file, but the old entry + // is still in the link file history). + // BUT haven't we already ruined the integrity when we enter here!? if ((oldEntry.md5() != null && newEntry.md5() != null)) { // Use md5 if available. @@ -71,6 +86,69 @@ private boolean canReuseEntryWithSameUrl(LinkFileEntry oldEntry, LinkFileEntry n } } + private LinkFileEntryV1 handleRepeatedEntries(LinkFileEntryV1 newEntry, FileReader reader) { + var reuseStrategy = LinkReuseStrategy.valueOf(getMeta().getProperty(LinkFileMeta.HEADER_REUSE_STRATEGY_KEY, defaultReuseStrategy.name())); + + if (reuseStrategy == LinkFileV1.LinkReuseStrategy.NO_REUSE) { + return newEntry; // No reuse, always create new entry and data. + } + + var existingEntry = findExistingEntryByMD5(newEntry); + if (existingEntry == null) { + return newEntry; // No existing entry with same MD5, create new entry and data. + } + + // Can clean now we are not going to use the file, and it is managed so we should be able to delete it. + cleanEntryDataIfManaged(newEntry, reader); + + var isExistingEntryLatestEntry = existingEntry.equals(getLatestEntry()); + if (reuseStrategy == LinkReuseStrategy.REUSE && isExistingEntryLatestEntry) { + // Existing matching entry is the latest entry, do nothing. + return null; + } else if (reuseStrategy == LinkReuseStrategy.REUSE_DATA || reuseStrategy == LinkReuseStrategy.REUSE) { + // Reuse the data, create new entry with same URL but new timestamp, serial and info. + return new LinkFileEntryV1(existingEntry.url(), newEntry.timestamp(), existingEntry.md5(), newEntry.serial(), newEntry.info()); + } else { + throw new IllegalArgumentException("Unsupported reuse strategy: " + getMeta().getProperty(LinkFileMeta.HEADER_REUSE_STRATEGY_KEY)); + } + } + + private LinkFileEntry findExistingEntryByMD5(LinkFileEntry entry) { + for (LinkFileEntry existingEntry : entries) { + if (existingEntry.md5().equals(entry.md5())) { + return existingEntry; + } + } + return null; + } + + private LinkFileEntry findExistingEntryByUrl(LinkFileEntry entry) { + for (LinkFileEntry existingEntry : entries) { + if (existingEntry.url().equals(entry.url())) { + return existingEntry; + } + } + return null; + } + + /** + * + * @param candiateEntry entry with the same MD5 as an existing entry, and thus candidate for reuse of the underlying data. + * @param reader + */ + private void cleanEntryDataIfManaged(LinkFileEntry candiateEntry, FileReader reader) { + if (getMeta().getPropertyBool(LinkFileMeta.HEADER_DATA_LIFECYCLE_MANAGED_KEY, false)) { + if (findExistingEntryByUrl(candiateEntry) == null) { + // The file is not used by any existing entry, and the data is managed, we can safely delete it. + try { + reader.delete(candiateEntry.url()); + } catch (Exception e) { + log.warn("Failed to delete data for existing entry with same MD5: " + candiateEntry.url(), e); + } + } + } + } + private void checkDefaultMeta() { if (!meta.getVersion().equals(VERSION)) { meta.loadAndMergeMeta(getDefaultMetaContent()); diff --git a/model/src/main/java/org/gorpipe/gor/model/BaseMeta.java b/model/src/main/java/org/gorpipe/gor/model/BaseMeta.java index 3c28eff2..8df0d237 100644 --- a/model/src/main/java/org/gorpipe/gor/model/BaseMeta.java +++ b/model/src/main/java/org/gorpipe/gor/model/BaseMeta.java @@ -94,6 +94,17 @@ public int getPropertyInt(String key, int defValue) { return headerProps.containsKey(key) ? Integer.parseInt(headerProps.get(key)) : defValue; } + /** + * Get header property. + * + * @param key name of the property. + * @param defValue default value + * @return the header property identified with [key] + */ + public boolean getPropertyBool(String key, boolean defValue) { + return headerProps.containsKey(key) ? Boolean.parseBoolean(headerProps.get(key)) : defValue; + } + /** * Set header property. * diff --git a/model/src/main/java/org/gorpipe/gor/model/DriverBackedFileReader.java b/model/src/main/java/org/gorpipe/gor/model/DriverBackedFileReader.java index 1c51c415..cb8c0d42 100644 --- a/model/src/main/java/org/gorpipe/gor/model/DriverBackedFileReader.java +++ b/model/src/main/java/org/gorpipe/gor/model/DriverBackedFileReader.java @@ -51,7 +51,6 @@ import org.slf4j.LoggerFactory; import java.io.*; -import java.net.URI; import java.nio.file.*; import java.nio.file.attribute.FileAttribute; import java.util.*; @@ -70,7 +69,7 @@ public class DriverBackedFileReader extends FileReader { final static int GZIP_BUFFER_SIZE = Integer.parseInt(System.getProperty("gor.gzip.buffer.size", "2046")); - private final boolean DEPENDENTS = System.getProperty("gor.filereader.dependents", "true").equalsIgnoreCase("true"); + private final boolean DEPENDENTS = Boolean.parseBoolean(System.getProperty("gor.filereader.dependents", "true")); private final String securityContext; protected final String commonRoot; diff --git a/model/src/main/java/org/gorpipe/gor/model/FileReader.java b/model/src/main/java/org/gorpipe/gor/model/FileReader.java index 87a8214d..6ce76b7c 100644 --- a/model/src/main/java/org/gorpipe/gor/model/FileReader.java +++ b/model/src/main/java/org/gorpipe/gor/model/FileReader.java @@ -343,7 +343,7 @@ public void writeLinkIfNeeded(String url) throws IOException { DataSource dataSource = resolveUrl(url, true); if (dataSource.forceLink()) { DataSource linkDataSource = resolveUrl(dataSource.getProjectLinkFile(), true); - LinkFile.create((StreamSource) linkDataSource, dataSource.getProjectLinkFileContent()).save(getQueryTime()); + LinkFile.create((StreamSource) linkDataSource, dataSource.getProjectLinkFileContent()).save(getQueryTime(), this); } } diff --git a/model/src/main/java/org/gorpipe/gor/table/livecycle/TableTwoPhaseCommitSupport.java b/model/src/main/java/org/gorpipe/gor/table/livecycle/TableTwoPhaseCommitSupport.java index 23d27fc0..20b8ebc2 100644 --- a/model/src/main/java/org/gorpipe/gor/table/livecycle/TableTwoPhaseCommitSupport.java +++ b/model/src/main/java/org/gorpipe/gor/table/livecycle/TableTwoPhaseCommitSupport.java @@ -57,7 +57,7 @@ public void commit() { LinkFile.load((StreamSource) table.fileReader.resolveDataSource(new SourceReference(table.getLinkPath()))) .appendEntry(table.getPath(), "") - .save(); + .save(table.fileReader); } else { if (!table.isUseEmbeddedHeader()) { diff --git a/model/src/main/java/org/gorpipe/gor/util/StringUtil.java b/model/src/main/java/org/gorpipe/gor/util/StringUtil.java index 5a10e144..e89f35d5 100644 --- a/model/src/main/java/org/gorpipe/gor/util/StringUtil.java +++ b/model/src/main/java/org/gorpipe/gor/util/StringUtil.java @@ -270,4 +270,13 @@ public static String cleanTextContent(String text) return text.trim(); } + + public static String trimQuotes(String s) { + if (s == null) return null; + s = s.trim(); + if (s.length() > 1 && ((s.startsWith("\"") && s.endsWith("\"")) || (s.startsWith("'") && s.endsWith("'")))) { + return s.substring(1, s.length() - 1); + } + return s.trim(); + } } diff --git a/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java b/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java index a8f447d4..25fac6b5 100644 --- a/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java +++ b/model/src/test/java/org/gorpipe/gor/driver/linkfile/LinkFileTest.java @@ -4,16 +4,19 @@ import org.gorpipe.gor.driver.meta.SourceReference; import org.gorpipe.gor.driver.providers.stream.sources.StreamSource; import org.gorpipe.gor.driver.providers.stream.sources.file.FileSource; +import org.gorpipe.gor.model.DriverBackedFileReader; +import org.gorpipe.gor.model.FileReader; import org.junit.*; import org.junit.contrib.java.lang.system.EnvironmentVariables; import org.junit.contrib.java.lang.system.RestoreSystemProperties; import org.junit.rules.TemporaryFolder; -import java.io.ByteArrayInputStream; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import static org.gorpipe.gor.driver.linkfile.LinkFileV1.LinkReuseStrategy.NO_REUSE; +import static org.gorpipe.gor.driver.linkfile.LinkFileV1.LinkReuseStrategy.REUSE; import static org.junit.Assert.*; import static org.mockito.Mockito.*; @@ -29,9 +32,12 @@ public class LinkFileTest { public final EnvironmentVariables environmentVariables = new EnvironmentVariables(); + private FileReader fileReader; + private StreamSource mockSource; + private StreamSource source; private final String v1LinkFileContent = """ - ## SERIAL = 1 + ## SERIAL = 2 ## VERSION = 1 #FILE\tTIMESTAMP\tMD5\tSERIAL\tINFO source/v1/ver1.gorz\t2024-12-15T11:21:30.790Z\tABCDEAF13422\t1\t @@ -44,15 +50,18 @@ public class LinkFileTest { protected Path workPath; @Before - public void setUp() { + public void setUp() throws IOException { workPath = workDir.getRoot().toPath().toAbsolutePath(); - mockSource = mock(StreamSource.class); + source = new FileSource(workPath.resolve("test.link").toString()); + + fileReader = new DriverBackedFileReader(null, workPath.toString()); + mockSource = mock(StreamSource.class); } @Test public void testCreateLinkFile() { - LinkFile linkFile = LinkFile.create(mockSource, v1LinkFileContent); + LinkFile linkFile = LinkFile.create(source, v1LinkFileContent); assertNotNull(linkFile); assertTrue(linkFile instanceof LinkFileV1); assertEquals("1", linkFile.getMeta().getVersion()); @@ -62,7 +71,7 @@ public void testCreateLinkFile() { @Test public void testCreateLinkFileSimple() throws IOException { - LinkFile linkFile = LinkFile.createV1(mockSource, "test.gorz"); + LinkFile linkFile = LinkFile.createV1(source, "test.gorz"); assertNotNull(linkFile); assertTrue(linkFile instanceof LinkFileV1); assertEquals("1", linkFile.getMeta().getVersion()); @@ -74,7 +83,7 @@ public void testCreateLinkFileSimple() throws IOException { @Test public void testCreateLinkFileSimpleWithDefault0() { System.setProperty("gor.driver.link.default.version", "0"); - LinkFile linkFile = LinkFile.create(mockSource, "test.gorz"); + LinkFile linkFile = LinkFile.create(source, "test.gorz"); assertNotNull(linkFile); assertTrue(linkFile instanceof LinkFileV0); assertEquals("0", linkFile.getMeta().getVersion()); @@ -84,9 +93,8 @@ public void testCreateLinkFileSimpleWithDefault0() { @Test public void testLoadLinkFile() throws IOException { - when(mockSource.exists()).thenReturn(true); - when(mockSource.open()).thenReturn(new ByteArrayInputStream(v1LinkFileContent.getBytes())); - LinkFile linkFile = LinkFile.load(mockSource); + Files.writeString(Path.of(source.getFullPath()), v1LinkFileContent); + LinkFile linkFile = LinkFile.load(source); assertNotNull(linkFile); assertEquals(2, linkFile.getEntries().size()); assertEquals(100, linkFile.getEntriesCountMax()); @@ -94,7 +102,7 @@ public void testLoadLinkFile() throws IOException { @Test public void testAppendEntry() { - LinkFile linkFile = LinkFile.create(mockSource, v1LinkFileContent); + LinkFile linkFile = LinkFile.create(source, v1LinkFileContent); linkFile.appendEntry(simpleFile, "NEWMD5SUM", "Test1"); assertEquals(3, linkFile.getEntries().size()); } @@ -126,7 +134,7 @@ public void testSaveNewV1LinkFile() throws IOException { var linkPath = workPath.resolve("test.link"); LinkFile linkFile = LinkFile.createV1(new FileSource(linkPath.toString()), ""); linkFile.appendEntry(simpleFile, "NEWMD5SUM"); - linkFile.save(); + linkFile.save(fileReader); String savedContent = Files.readString(linkPath); assertTrue(savedContent.contains("## VERSION = 1")); assertTrue(savedContent.contains(simpleFile)); @@ -137,7 +145,7 @@ public void testSaveNewV0LinkFile() throws IOException { var linkPath = workPath.resolve("test.link"); LinkFile linkFile = LinkFile.createV0(new FileSource(linkPath.toString()), ""); linkFile.appendEntry(simpleFile, "NEWMD5SUM"); - linkFile.save(); + linkFile.save(fileReader); String savedContent = Files.readString(linkPath); assertEquals(simpleFile, savedContent.trim()); } @@ -148,9 +156,9 @@ public void testSaveLinkFileV1ToV1() throws IOException { Files.writeString(linkPath, v1LinkFileContent); LinkFile linkFile = LinkFile.load(new FileSource(linkPath.toString())); linkFile.appendEntry(simpleFile, "NEWMD5SUM"); - linkFile.save(); + linkFile.save(fileReader); String savedContent = Files.readString(linkPath); - assertTrue(savedContent.startsWith("## SERIAL = 2")); + assertTrue(savedContent.startsWith("## SERIAL = 3")); assertTrue(savedContent.contains(simpleFile)); } @@ -160,7 +168,7 @@ public void testSaveLinkFileV0ToV0() throws IOException { Files.writeString(linkPath, "a/b/c.gorz"); LinkFile linkFile = LinkFile.load(new FileSource(linkPath.toString())); linkFile.appendEntry(simpleFile, "NEWMD5SUM"); - linkFile.save(); + linkFile.save(fileReader); String savedContent = Files.readString(linkPath); assertEquals(simpleFile, savedContent.trim()); } @@ -171,7 +179,7 @@ public void testSaveLinkFileV0ToV1() throws IOException { Files.writeString(linkPath, "a/b/c.gorz"); LinkFile linkFile = LinkFile.loadV1(new FileSource(linkPath.toString())); linkFile.appendEntry(simpleFile, "NEWMD5SUM"); - linkFile.save(); + linkFile.save(fileReader); String savedContent = Files.readString(linkPath); assertTrue(savedContent.contains("## VERSION = 1")); assertEquals(2, linkFile.getEntries().size()); @@ -184,7 +192,7 @@ public void testSaveLinkFileV1ToV0() throws IOException { Files.writeString(linkPath, v1LinkFileContent); LinkFile linkFile = LinkFile.loadV0(new FileSource(linkPath.toString())); linkFile.appendEntry(simpleFile, "NEWMD5SUM"); - linkFile.save(); + linkFile.save(fileReader); String savedContent = Files.readString(linkPath); assertEquals(simpleFile, savedContent.trim()); } @@ -260,4 +268,206 @@ public void testInferDataFileNameFromLinkFile_AbsolutePath() throws Exception { assertNotNull(result); assertTrue(result.matches((root + "/x/x_.*\\.gor").replace("/", "\\/"))); } + + @Test + public void testLimitByNumberOfEntries() throws IOException { + LinkFile linkFile = LinkFile.create(source, v1LinkFileContent); + linkFile.setEntriesCountMax(2); + + // Current entries: 2. Add one more. + linkFile.appendEntry("new_entry.gor", "md5"); + linkFile.save(fileReader); + + LinkFile saved = LinkFile.loadV1(source); + assertEquals(2, saved.getEntriesCount()); + // Verify the latest entry is the one we appended + assertEquals(workPath.resolve("new_entry.gor").toString(), saved.getLatestEntryUrl()); + // Verify the oldest entry was dropped. + assertEquals("source/v1/ver2.gorz", saved.getEntries().get(0).url()); + } + + @Test + public void testLimitByTimestamp() throws IOException { + // v1LinkFileContent contains timestamps: + // ver1: 1734261690790 + // ver2: 1734305124533 + + LinkFile linkFile = LinkFile.create(source, v1LinkFileContent); + linkFile.setEntriesAgeMax(10000); + + // Save with current time far in the future + long futureTime = 1734305124533L + 20000; + linkFile.save(futureTime, fileReader); + + LinkFile saved = LinkFile.loadV1(source); + + // Both original entries are older than 10s from futureTime. + // Should be empty or 0 entries written (if header preserved) + assertEquals(0, saved.getEntriesCount()); + } + + @Test + public void testLimitBySize() throws IOException { + LinkFile linkFile = LinkFile.create(source, v1LinkFileContent); + linkFile.getMeta().setProperty(LinkFileMeta.HEADER_REUSE_STRATEGY_KEY, NO_REUSE.name()); + + // Generate a large info string to exceed default 10000 bytes. + // 4000 chars * 3 entries should exceed it. + String largeInfo = java.util.stream.IntStream.range(0, 4000).mapToObj(i -> "x").reduce("", String::concat); + + linkFile.appendEntry("large1.gor", "md5", largeInfo); + linkFile.appendEntry("large2.gor", "md5", largeInfo); + linkFile.appendEntry("large3.gor", "md5", largeInfo); + linkFile.save(fileReader); + + LinkFile saved = LinkFile.loadV1(source); + + // Should have fewer than 3+2=5 entries. + // Likely latest 2 entries fit (~8kb + some), 3rd pushes over. + assertTrue(saved.getEntriesCount() < 5); + // Ensure latest is present + assertEquals(workPath.resolve("large3.gor").toString(), saved.getLatestEntryUrl()); + } + + @Test + public void testGarbageCollectionManaged() throws IOException { + LinkFile linkFile = LinkFile.create(source, v1LinkFileContent); + linkFile.setEntriesCountMax(1); // Force eviction of oldest + linkFile.getMeta().setProperty("DATA_LIFECYCLE_MANAGED", "true"); + + FileReader mockReader = mock(FileReader.class); + StreamSource mockDeletedSource = mock(StreamSource.class); + + // Original oldest is source/v1/ver1.gorz + when(mockReader.resolveUrl(anyString())).thenReturn(mockDeletedSource); + when(mockDeletedSource.exists()).thenReturn(true); + + linkFile.save(mockReader); + + // Verify async delete called. Use timeout because it's in a separate thread. + verify(mockDeletedSource, timeout(1000).atLeastOnce()).delete(); + // Verify it tried to delete ver1 (the one evicted) + verify(mockReader, timeout(1000)).resolveUrl(contains("ver1.gorz")); + } + + @Test + public void testNoGarbageCollectionUnmanaged() throws IOException { + LinkFile linkFile = LinkFile.create(source, v1LinkFileContent); + linkFile.setEntriesCountMax(1); // Force eviction + linkFile.getMeta().setProperty("DATA_LIFECYCLE_MANAGED", "false"); + + FileReader mockReader = mock(FileReader.class); + StreamSource mockDeletedSource = mock(StreamSource.class); + + when(mockReader.resolveUrl(anyString())).thenReturn(mockDeletedSource); + + linkFile.save(mockReader); + + // Verify delete NOT called. + verify(mockDeletedSource, after(500).never()).delete(); + } + + @Test + public void testReuseStrategyReuseSkipsDuplicateLatestEntryUnManaged() throws IOException { + var setupRes = setupReuseStrategyTest(REUSE, false); + + assertEquals(setupRes.initialCount, setupRes.linkFile.getEntriesCount()); + assertEquals(setupRes.latestBefore, setupRes.linkFile.getLatestEntry()); + assertTrue(Files.exists(setupRes.newFile)); // Verify the new file is not used and not deleted. + } + + @Test + public void testReuseStrategyReuseDataCreatesNewEntryWithExistingUrlUnManaged() throws IOException { + var setupRes = setupReuseStrategyTest(LinkFileV1.LinkReuseStrategy.REUSE_DATA, false); + + assertEquals(setupRes.initialCount + 1, setupRes.linkFile.getEntriesCount()); + var latestAfter = setupRes.linkFile.getLatestEntry(); + assertEquals("source/v1/ver2.gorz", latestAfter.url()); + assertEquals("new entry info", latestAfter.info()); + assertEquals(setupRes.latestBefore.serial() + 1, latestAfter.serial()); + assertTrue(Files.exists(setupRes.newFile)); // Verify the new file is not used and not deleted. + } + + @Test + public void testReuseStrategyNoReuseKeepsNewUrlUnManaged() throws IOException { + var setupRes = setupReuseStrategyTest(NO_REUSE, false); + + assertEquals(setupRes.initialCount + 1, setupRes.linkFile.getEntriesCount()); + var latestAfter = setupRes.linkFile.getLatestEntry(); + assertEquals(setupRes.newFile.toString(), latestAfter.url()); + assertEquals("new entry info", latestAfter.info()); + assertTrue(Files.exists(setupRes.newFile)); // Verify the new file is not used and not deleted. + } + + @Test + public void testReuseStrategyReuseSkipsDuplicateLatestEntryManaged() throws IOException { + var setupRes = setupReuseStrategyTest(REUSE, true); + + assertEquals(setupRes.initialCount, setupRes.linkFile.getEntriesCount()); + assertEquals(setupRes.latestBefore, setupRes.linkFile.getLatestEntry()); + assertFalse(Files.exists(setupRes.newFile)); // Verify the new file is not used and deleted. + } + + @Test + public void testReuseStrategyReuseDataCreatesNewEntryWithExistingUrlManaged() throws IOException { + var setupRes = setupReuseStrategyTest(LinkFileV1.LinkReuseStrategy.REUSE_DATA, true); + + assertEquals(setupRes.initialCount + 1, setupRes.linkFile.getEntriesCount()); + var latestAfter = setupRes.linkFile.getLatestEntry(); + assertEquals("source/v1/ver2.gorz", latestAfter.url()); + assertEquals("new entry info", latestAfter.info()); + assertEquals(setupRes.latestBefore.serial() + 1, latestAfter.serial()); + assertFalse(Files.exists(setupRes.newFile)); // Verify the new file is not used and deleted. + } + + @Test + public void testReuseStrategyNoReuseKeepsNewUrlManaged() throws IOException { + var setupRes = setupReuseStrategyTest(NO_REUSE, true); + + assertEquals(setupRes.initialCount + 1, setupRes.linkFile.getEntriesCount()); + var latestAfter = setupRes.linkFile.getLatestEntry(); + assertEquals(setupRes.newFile.toString(), latestAfter.url()); + assertEquals("new entry info", latestAfter.info()); + assertTrue(Files.exists(setupRes.newFile)); // Verify the new file is not used and not deleted. + } + + @Test + public void testReuseStrategyReuseKeepsNewUrlUnManagedDontDeleteExisting() throws IOException { + LinkFile linkFile = LinkFile.create(source, v1LinkFileContent); + linkFile.getMeta().setProperty(LinkFileMeta.HEADER_REUSE_STRATEGY_KEY, REUSE.name()); + linkFile.getMeta().setProperty(LinkFileMeta.HEADER_DATA_LIFECYCLE_MANAGED_KEY, String.valueOf(true)); + + int initialCount = linkFile.getEntriesCount(); + var latestBefore = linkFile.getLatestEntry(); + + Path newFile = workPath.resolve("ver3.gor"); + Files.writeString(newFile, "#Chrom\tPos\nchr1\t100\n"); + // Add same entry twice, make sure we dont delete the file. + linkFile.appendEntry(newFile.toString(), "123", "new entry info", fileReader); + linkFile.appendEntry(newFile.toString(), "123", "new entry info", fileReader); + + assertEquals(initialCount + 1, linkFile.getEntriesCount()); + assertNotEquals(latestBefore, linkFile.getLatestEntry()); + assertEquals("new entry info", linkFile.getLatestEntry().info()); + assertTrue(Files.exists(newFile)); // Verify the new file is not used and deleted. + } + + record ReuseStrategySetupResult(LinkFile linkFile, int initialCount, LinkFileEntry latestBefore, Path newFile) {} + private ReuseStrategySetupResult setupReuseStrategyTest(LinkFileV1.LinkReuseStrategy reuseStrategy, boolean lifecycleManaged) throws IOException { + LinkFile linkFile = LinkFile.create(source, v1LinkFileContent); + linkFile.getMeta().setProperty(LinkFileMeta.HEADER_REUSE_STRATEGY_KEY, reuseStrategy.name()); + linkFile.getMeta().setProperty(LinkFileMeta.HEADER_DATA_LIFECYCLE_MANAGED_KEY, String.valueOf(lifecycleManaged)); + + int initialCount = linkFile.getEntriesCount(); + var latestBefore = linkFile.getLatestEntry(); + + Path newFile = workPath.resolve("ver3.gor"); + Files.writeString(newFile, "#Chrom\tPos\nchr1\t100\n"); + + linkFile.appendEntry(newFile.toString(), "334DEAF13422", "new entry info", fileReader); + + return new ReuseStrategySetupResult(linkFile, initialCount, latestBefore, newFile); + } + + }