diff --git a/src/SIL.Machine/Corpora/UsfmVersificationErrorDetector.cs b/src/SIL.Machine/Corpora/UsfmVersificationErrorDetector.cs index e7c1a0a2..1cd72f48 100644 --- a/src/SIL.Machine/Corpora/UsfmVersificationErrorDetector.cs +++ b/src/SIL.Machine/Corpora/UsfmVersificationErrorDetector.cs @@ -12,7 +12,9 @@ public enum UsfmVersificationErrorType ExtraVerse, InvalidVerseRange, MissingVerseSegment, - ExtraVerseSegment + ExtraVerseSegment, + InvalidChapterNumber, + InvalidVerseNumber } public class UsfmVersificationError @@ -22,6 +24,7 @@ public class UsfmVersificationError private readonly int _expectedVerse; private readonly int _actualChapter; private readonly int _actualVerse; + private readonly string _actualValue; private VerseRef? _verseRef = null; public UsfmVersificationError( @@ -43,6 +46,21 @@ public UsfmVersificationError( ProjectName = projectName; } + public UsfmVersificationError( + int bookNum, + int expectedChapter, + string actualValue, + string projectName, + UsfmVersificationErrorType type + ) + { + _bookNum = bookNum; + _expectedChapter = expectedChapter; + _actualValue = actualValue; + ProjectName = projectName; + Type = type; + } + public string ProjectName { get; private set; } public UsfmVersificationErrorType Type { get; private set; } @@ -104,8 +122,14 @@ public string ExpectedVerseRef { get { - if (Type == UsfmVersificationErrorType.ExtraVerse) + if ( + Type == UsfmVersificationErrorType.ExtraVerse + || Type == UsfmVersificationErrorType.InvalidChapterNumber + || Type == UsfmVersificationErrorType.InvalidVerseNumber + ) + { return ""; + } // We do not want to throw an exception here, and the VerseRef constructor can throw // an exception with certain invalid verse data; use TryParse instead. @@ -154,11 +178,20 @@ out VerseRef correctedVerseRangeRef return defaultVerseRef.ToString(); } } + public string ActualVerseRef { get { - if (_verseRef != null) + if (Type == UsfmVersificationErrorType.InvalidChapterNumber) + { + return $"{Canon.BookNumberToId(_bookNum)} {_actualValue}"; + } + else if (Type == UsfmVersificationErrorType.InvalidVerseNumber) + { + return $"{Canon.BookNumberToId(_bookNum)} {_expectedChapter}:{_actualValue}"; + } + else if (_verseRef != null) { return _verseRef.ToString(); } @@ -254,6 +287,22 @@ string pubNumber _currentChapter = state.VerseRef.ChapterNum; _currentVerse = new VerseRef(); + + // See whether the chapter number is invalid + VerseRef verseRef = state.VerseRef.Clone(); + verseRef.Chapter = number; + if (verseRef.ChapterNum == -1) + { + _errors.Add( + new UsfmVersificationError( + _currentBook, + _currentChapter, + number, + _projectName, + UsfmVersificationErrorType.InvalidChapterNumber + ) + ); + } } public override void Verse( @@ -264,6 +313,7 @@ public override void Verse( string pubNumber ) { + bool verseInError = false; _currentVerse = state.VerseRef; if (_currentBook > 0 && Canon.IsCanonical(_currentBook) && _currentChapter > 0) { @@ -277,7 +327,29 @@ string pubNumber _currentVerse ); if (versificationError.CheckError()) + { _errors.Add(versificationError); + verseInError = true; + } + } + + if (!verseInError) + { + // See whether the verse number is invalid + VerseRef verseRef = _currentVerse.Clone(); + verseRef.Verse = number; + if (verseRef.VerseNum == -1) + { + _errors.Add( + new UsfmVersificationError( + _currentBook, + _currentChapter, + number, + _projectName, + UsfmVersificationErrorType.InvalidVerseNumber + ) + ); + } } } } diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs index d4582db8..f3c61806 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs @@ -397,6 +397,55 @@ public void GetUsfmVersificationErrors_MultipleChapters() Assert.That(errors[1].ActualVerseRef, Is.EqualTo("2JN 2:1")); } + [Test] + public void GetUsfmVersificationErrors_InvalidChapterNumber() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "653JNTest.SFM", + @"\id 3JN + \c 1. + " + } + } + ); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(2), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.InvalidChapterNumber)); + Assert.That(errors[1].Type, Is.EqualTo(UsfmVersificationErrorType.MissingChapter)); + Assert.That(errors[0].ExpectedVerseRef, Is.Empty); + Assert.That(errors[1].ExpectedVerseRef, Is.EqualTo("3JN 1:15")); + Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1.")); + Assert.That(errors[1].ActualVerseRef, Is.EqualTo("3JN -1:0")); + } + + [Test] + public void GetUsfmVersificationErrors_InvalidVerseNumber() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "653JNTest.SFM", + @"\id 3JN + \c 1 + \v v1 + " + } + } + ); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(2), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.InvalidVerseNumber)); + Assert.That(errors[1].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerse)); + Assert.That(errors[0].ExpectedVerseRef, Is.Empty); + Assert.That(errors[1].ExpectedVerseRef, Is.EqualTo("3JN 1:15")); + Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:v1")); + Assert.That(errors[1].ActualVerseRef, Is.EqualTo("3JN 1:0")); + } + private class TestEnvironment(ParatextProjectSettings? settings = null, Dictionary? files = null) { public ParatextProjectVersificationErrorDetectorBase Detector { get; } =