Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 22 additions & 24 deletions lib/openstax/content/abl.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@ class OpenStax::Content::Abl
# If there are more than this number of archive versions still building, errors will happen
DEFAULT_MAX_ARCHIVE_ATTEMPTS = 5

attr_reader :partial_data

def initialize(url: nil)
@url = url
@partial_data = false
end

def url
Expand Down Expand Up @@ -39,51 +42,46 @@ def books(archive: OpenStax::Content::Archive.new)
end
end

def each_book_with_previous_archive_version_fallback(max_attempts: DEFAULT_MAX_ARCHIVE_ATTEMPTS, &block)
def each_book_with_previous_archive_version_fallback(max_attempts: DEFAULT_MAX_ARCHIVE_ATTEMPTS, allow_partial_data: true, &block)
raise ArgumentError, 'no block given' if block.nil?
raise ArgumentError, 'given block must accept the book as its first argument' if block.arity == 0

books = OpenStax::Content::Abl.new.books
attempt = 1
@partial_data = false

until books.empty?
books.each do |book|
attempt = 1
previous_version = nil
previous_archive = nil
retry_books = []

books.each do |book|
while attempt <= max_attempts
begin
block.call book
break
rescue StandardError => exception
raise exception if attempt >= max_attempts

# Sometimes books in the latest archive fails to load (when the new version is still building)
# Retry with an earlier version of archive, if possible
previous_version ||= book.archive.previous_version

if previous_version.nil?
# There are no more earlier archive versions
raise exception
previous_version = book.archive.previous_version
if previous_version.nil? or attempt >= max_attempts
raise exception unless allow_partial_data
@partial_data = true
OpenStax::Content::logger.warn do
"Failed to process slugs for book: #{book.uuid}. " \
"Error: #{exception.class}: #{exception.message}"
end
break
else
previous_archive ||= OpenStax::Content::Archive.new version: previous_version

retry_book = OpenStax::Content::Book.new(
previous_archive = OpenStax::Content::Archive.new version: previous_version
book = OpenStax::Content::Book.new(
archive: previous_archive,
uuid: book.uuid,
version: book.version,
slug: book.slug,
min_code_version: book.min_code_version,
committed_at: book.committed_at
)

# If the book requires an archive version that hasn't finished building yet, don't include it
retry_books << retry_book if retry_book.valid?
break unless book.valid?
end
attempt += 1
end
end

books = retry_books
attempt += 1
end
end

Expand Down
162 changes: 162 additions & 0 deletions spec/openstax/content/abl_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,166 @@
it 'can return a map of all page slugs by uuid' do
expect(abl.slugs_by_page_uuid.size).to eq(24385)
end

it 'sets partial_data to true when a book fails to process', vcr: { allow_unused_http_interactions: true } do
archive_version = '20250522.165258'
archive_versions = [archive_version]
allow_any_instance_of(OpenStax::Content::Archive).to receive(:versions).and_wrap_original do |method, *args|
archive_versions
end

# Stub previous_version to return the appropriate previous version
allow_any_instance_of(OpenStax::Content::Archive).to receive(:previous_version).and_wrap_original do |method, *args|
archive = method.receiver
current_index = archive_versions.index(archive.version)
current_index && current_index > 0 ? archive_versions[current_index - 1] : nil
end

allow_any_instance_of(OpenStax::Content::Abl).to receive(:books).and_wrap_original do |method, *args|
archive = OpenStax::Content::Archive.new(version: archive_version)
result = []
['0000000', '0000001'].each do |version|
result << OpenStax::Content::Book.new(
archive: archive,
uuid: '00000000-0000-0000-0000-000000000000',
version: version,
min_code_version: archive_version,
slug: 'test-book',
committed_at: '2026-01-21T21:45:57+00:00'
)
end
result
end
# Stub to make one book fail during all_pages processing
allow_any_instance_of(OpenStax::Content::Book).to receive(:all_pages).and_wrap_original do |method, *args|
# Fail for the first book encountered
if @first_book_processed
# Return fake pages for the second book
[
OpenStruct.new(uuid: '11111111-1111-1111-1111-111111111111', slug: 'test-page-1'),
OpenStruct.new(uuid: '22222222-2222-2222-2222-222222222222', slug: 'test-page-2')
]
else
@first_book_processed = true
raise StandardError, 'Simulated archive error'
end
end

# Expect a warning to be logged
expect(OpenStax::Content::logger).to receive(:warn).at_least(:once)
# Should start as false
expect(abl.partial_data).to be false

result = abl.slugs_by_page_uuid

# Should still return results (from other books that succeeded)
expect(result).to be_a(Hash)
expect(result).not_to be_empty

# Should mark data as partial
expect(abl.partial_data).to be true
end

it 'sets partial_data to true after exhausting all archive version retries' do
# Set up three archive versions to test the retry loop
archive_versions = ['20250520.165258', '20250521.165258', '20250522.165258']
allow_any_instance_of(OpenStax::Content::Archive).to receive(:versions).and_wrap_original do |method, *args|
archive_versions
end

# Stub previous_version to return the appropriate previous version
allow_any_instance_of(OpenStax::Content::Archive).to receive(:previous_version).and_wrap_original do |method, *args|
archive = method.receiver
current_index = archive_versions.index(archive.version)
current_index && current_index > 0 ? archive_versions[current_index - 1] : nil
end

# Create two books, each with different UUIDs to ensure proper isolation
allow_any_instance_of(OpenStax::Content::Abl).to receive(:books).and_wrap_original do |method, *args|
archive = OpenStax::Content::Archive.new(version: archive_versions.last)
result = []
result << OpenStax::Content::Book.new(
archive: archive,
uuid: 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa',
version: '1.0',
min_code_version: archive_versions.first,
slug: 'failing-book',
committed_at: '2026-01-21T21:45:57+00:00'
)
result << OpenStax::Content::Book.new(
archive: archive,
uuid: 'bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb',
version: '1.0',
min_code_version: archive_versions.first,
slug: 'success-book',
committed_at: '2026-01-21T21:45:57+00:00'
)
result
end

attempt_count = {}

# Stub to make the first book fail for ALL archive versions
allow_any_instance_of(OpenStax::Content::Book).to receive(:all_pages).and_wrap_original do |method, *args, &block|
book = args[0].is_a?(OpenStax::Content::Book) ? args[0] : method.receiver

if book.slug == 'failing-book'
# Track attempts for this book
attempt_count[book.uuid] ||= 0
attempt_count[book.uuid] += 1
raise StandardError, "Simulated failure for attempt #{attempt_count[book.uuid]}"
else
# Success book returns fake pages
[
OpenStruct.new(uuid: 'cccccccc-cccc-cccc-cccc-cccccccccccc', slug: 'success-page-1'),
OpenStruct.new(uuid: 'dddddddd-dddd-dddd-dddd-dddddddddddd', slug: 'success-page-2')
]
end
end

# Expect warnings to be logged for failed attempts
expect(OpenStax::Content::logger).to receive(:warn).at_least(:once)

# Should start as false
expect(abl.partial_data).to be false

result = abl.slugs_by_page_uuid

# Should still return results from the successful book
expect(result).to be_a(Hash)
expect(result).not_to be_empty
expect(result['cccccccc-cccc-cccc-cccc-cccccccccccc']).not_to be_nil

# Should have tried multiple times (initial + retries)
expect(attempt_count['aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa']).to eq(3)
expect(abl.partial_data).to be true
end

it 'raises exception when allow_partial_data is false and a book fails' do
archive_version = '20250522.165258'
allow_any_instance_of(OpenStax::Content::Archive).to receive(:versions).and_wrap_original do |method, *args|
[archive_version]
end
allow_any_instance_of(OpenStax::Content::Abl).to receive(:books).and_wrap_original do |method, *args|
archive = OpenStax::Content::Archive.new(version: archive_version)
[OpenStax::Content::Book.new(
archive: archive,
uuid: 'eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee',
version: '1.0',
min_code_version: archive_version,
slug: 'failing-book',
committed_at: '2026-01-21T21:45:57+00:00'
)]
end

# Stub to make the book always fail
allow_any_instance_of(OpenStax::Content::Book).to receive(:all_pages).and_raise(StandardError, 'Test error')

# Should raise exception when allow_partial_data is false
expect do
abl.each_book_with_previous_archive_version_fallback(allow_partial_data: false) do |book|
book.all_pages
end
end.to raise_error(StandardError, 'Test error')
end
end