diff --git a/.github/workflows/_test-code-samples.yml b/.github/workflows/_test-code-samples.yml index ca03e5b8..81e76f0b 100644 --- a/.github/workflows/_test-code-samples.yml +++ b/.github/workflows/_test-code-samples.yml @@ -4,6 +4,18 @@ on: workflow_call: workflow_dispatch: +env: + MINDEE_ENDPOINT_SE_TESTS: ${{ secrets.MINDEE_ENDPOINT_SE_TESTS }} + MINDEE_ACCOUNT_SE_TESTS: ${{ secrets.MINDEE_ACCOUNT_SE_TESTS }} + MINDEE_API_KEY_SE_TESTS: ${{ secrets.MINDEE_API_KEY_SE_TESTS }} + MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} + MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} + MINDEE_V2_SE_TESTS_FAILURE_WEBHOOK_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FAILURE_WEBHOOK_ID }} + MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID }} + MINDEE_V2_SE_TESTS_CROP_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CROP_MODEL_ID }} + MINDEE_V2_SE_TESTS_OCR_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_OCR_MODEL_ID }} + MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }} + jobs: test: name: Run Tests @@ -28,8 +40,14 @@ jobs: ruby-version: ${{ matrix.ruby }} bundler-cache: true - - name: Tests code samples + - name: Tests V1 code samples + env: + MINDEE_LOG_LEVEL: DEBUG + run: | + ./spec/test_code_samples_v1.sh + + - name: Tests V2 code samples env: MINDEE_LOG_LEVEL: DEBUG run: | - ./spec/test_code_samples.sh ${{ secrets.MINDEE_ACCOUNT_SE_TESTS }} ${{ secrets.MINDEE_ENDPOINT_SE_TESTS }} ${{ secrets.MINDEE_API_KEY_SE_TESTS }} ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} + ./spec/test_code_samples_v2.sh diff --git a/.github/workflows/_test-integrations.yml b/.github/workflows/_test-integrations.yml index 72ad67cd..75a72a71 100644 --- a/.github/workflows/_test-integrations.yml +++ b/.github/workflows/_test-integrations.yml @@ -20,7 +20,7 @@ jobs: - "macos-latest" ruby: - "3.0" - - "3.4" + - "4.0" steps: - uses: actions/checkout@v4 with: @@ -56,6 +56,10 @@ jobs: MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} MINDEE_V2_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} MINDEE_V2_SE_TESTS_BLANK_PDF_URL: ${{ secrets.MINDEE_V2_SE_TESTS_BLANK_PDF_URL }} + MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID }} + MINDEE_V2_SE_TESTS_CROP_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CROP_MODEL_ID }} + MINDEE_V2_SE_TESTS_OCR_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_OCR_MODEL_ID }} + MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }} MINDEE_LOG_LEVEL: DEBUG run: | bundle exec rake integration diff --git a/.github/workflows/_test-units.yml b/.github/workflows/_test-units.yml index a78ce8de..1b3859e7 100644 --- a/.github/workflows/_test-units.yml +++ b/.github/workflows/_test-units.yml @@ -23,6 +23,7 @@ jobs: - "3.2" - "3.3" - "3.4" + - "4.0" steps: - uses: actions/checkout@v4 with: diff --git a/.gitignore b/.gitignore index 01872a55..d37f6b50 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,8 @@ /test/version_tmp/ /tmp/ _test.rb +_test_v1.rb +_test_v2.rb /vendor /mindee-*/ local-test diff --git a/.rubocop.yml b/.rubocop.yml index 7b56d374..fbb91a84 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -6,7 +6,7 @@ AllCops: - 'tmp/**/*' - '.git/**/*' - 'bin/*' - - _test.rb + - _test*.rb - local_test/* - Steepfile diff --git a/docs/code_samples/v2_classification.txt b/docs/code_samples/v2_classification.txt new file mode 100644 index 00000000..12bcd4dc --- /dev/null +++ b/docs/code_samples/v2_classification.txt @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +require 'mindee' +require 'mindee/v2/product' + +input_path = '/path/to/the/file.ext' +api_key = 'MY_API_KEY' +model_id = 'MY_MODEL_ID' + +# Init a new client +mindee_client = Mindee::ClientV2.new(api_key: api_key) + +# Set inference parameters +classification_params = { + # ID of the model, required. + model_id: model_id, +} + +# Load a file from disk +input_source = Mindee::Input::Source::PathInputSource.new(input_path) + +# Send for processing +response = mindee_client.enqueue_and_get_result( + Mindee::V2::Product::Classification::Classification, + input_source, + classification_params +) + +# Access the classification result +puts response.inference.result.classification diff --git a/docs/code_samples/v2_crop.txt b/docs/code_samples/v2_crop.txt new file mode 100644 index 00000000..bd4e6e57 --- /dev/null +++ b/docs/code_samples/v2_crop.txt @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +require 'mindee' +require 'mindee/v2/product' + +input_path = '/path/to/the/file.ext' +api_key = 'MY_API_KEY' +model_id = 'MY_MODEL_ID' + +# Init a new client +mindee_client = Mindee::ClientV2.new(api_key: api_key) + +# Set inference parameters +crop_params = { + # ID of the model, required. + model_id: model_id, +} + +# Load a file from disk +input_source = Mindee::Input::Source::PathInputSource.new(input_path) + +# Send for processing +response = mindee_client.enqueue_and_get_result( + Mindee::V2::Product::Crop::Crop, + input_source, + crop_params +) + +# Access the result crops +puts response.inference.result.crops \ No newline at end of file diff --git a/docs/code_samples/default_v2.txt b/docs/code_samples/v2_extraction.txt similarity index 82% rename from docs/code_samples/default_v2.txt rename to docs/code_samples/v2_extraction.txt index 366c53cf..3c9d57e2 100644 --- a/docs/code_samples/default_v2.txt +++ b/docs/code_samples/v2_extraction.txt @@ -1,6 +1,7 @@ # frozen_string_literal: true require 'mindee' +require 'mindee/v2/product' input_path = '/path/to/the/file.ext' api_key = 'MY_API_KEY' @@ -10,9 +11,9 @@ model_id = 'MY_MODEL_ID' mindee_client = Mindee::ClientV2.new(api_key: api_key) # Set inference parameters -inference_params = Mindee::Input::InferenceParameters.new( +inference_params = { # ID of the model, required. - model_id, + model_id: model_id, # Options: set to `true` or `false` to override defaults @@ -25,15 +26,16 @@ inference_params = Mindee::Input::InferenceParameters.new( # Boost the precision and accuracy of all extractions. # Calculate confidence scores for all fields. confidence: nil -) +} # Load a file from disk input_source = Mindee::Input::Source::PathInputSource.new(input_path) # Send for processing -response = mindee_client.enqueue_and_get_inference( +response = mindee_client.enqueue_and_get_result( + Mindee::V2::Product::Extraction::Extraction, input_source, - inference_params # This parameter can also be provided as a Hash. + inference_params ) # Print a brief summary of the parsed data diff --git a/docs/code_samples/v2_extraction_webhook.txt b/docs/code_samples/v2_extraction_webhook.txt new file mode 100644 index 00000000..a68f31eb --- /dev/null +++ b/docs/code_samples/v2_extraction_webhook.txt @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +require 'mindee' +require 'mindee/v2/product' + +input_path = '/path/to/the/file.ext' +api_key = 'MY_API_KEY' +model_id = 'MY_MODEL_ID' + +# Init a new client +mindee_client = Mindee::ClientV2.new(api_key: api_key) + +inference_params = { + # ID of the model, required. + model_id: model_id, + # Add any number of webhook IDs here. + webhook_ids: ['MY_WEBHOOK_ID'], + + # Options: set to `true` or `false` to override defaults + + # Enhance extraction accuracy with Retrieval-Augmented Generation. + rag: nil, + # Extract the full text content from the document as strings. + raw_text: nil, + # Calculate bounding box polygons for all fields. + polygon: nil, + # Boost the precision and accuracy of all extractions. + # Calculate confidence scores for all fields. + confidence: nil +} + +# Load a file from disk +input_source = Mindee::Input::Source::PathInputSource.new(input_path) + +# Send for processing +response = mindee_client.enqueue( + Mindee::V2::Product::Extraction::Extraction, + input_source, + inference_params +) + +# Print the job ID +job_id = response.job.id + +puts job_id diff --git a/docs/code_samples/v2_ocr.txt b/docs/code_samples/v2_ocr.txt new file mode 100644 index 00000000..3a7d9250 --- /dev/null +++ b/docs/code_samples/v2_ocr.txt @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +require 'mindee' +require 'mindee/v2/product' + +input_path = '/path/to/the/file.ext' +api_key = 'MY_API_KEY' +model_id = 'MY_MODEL_ID' + +# Init a new client +mindee_client = Mindee::ClientV2.new(api_key: api_key) + +# Set inference parameters +ocr_params = { + # ID of the model, required. + model_id: model_id, +} + +# Load a file from disk +input_source = Mindee::Input::Source::PathInputSource.new(input_path) + +# Send for processing +response = mindee_client.enqueue_and_get_result( + Mindee::V2::Product::Ocr::Ocr, + input_source, + ocr_params +) + +# Access the result OCR pages +puts response.inference.result.pages diff --git a/docs/code_samples/v2_split.txt b/docs/code_samples/v2_split.txt new file mode 100644 index 00000000..e599d927 --- /dev/null +++ b/docs/code_samples/v2_split.txt @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +require 'mindee' +require 'mindee/v2/product' + +input_path = '/path/to/the/file.ext' +api_key = 'MY_API_KEY' +model_id = 'MY_MODEL_ID' + +# Init a new client +mindee_client = Mindee::ClientV2.new(api_key: api_key) + +# Set inference parameters +split_params = { + # ID of the model, required. + model_id: model_id, +} + +# Load a file from disk +input_source = Mindee::Input::Source::PathInputSource.new(input_path) + +# Send for processing +response = mindee_client.enqueue_and_get_result( + Mindee::V2::Product::Split::Split, + input_source, + split_params +) + +# Access the result splits +puts response.inference.result.splits diff --git a/lib/mindee.rb b/lib/mindee.rb index 05378eb4..d05a9420 100644 --- a/lib/mindee.rb +++ b/lib/mindee.rb @@ -84,6 +84,13 @@ module IND module US end end + + # V2-specific module. + module V2 + # Product-specific module. + module Product + end + end end # Shorthand to call the logger from anywhere. diff --git a/lib/mindee/client_v2.rb b/lib/mindee/client_v2.rb index f70be6a4..39def57c 100644 --- a/lib/mindee/client_v2.rb +++ b/lib/mindee/client_v2.rb @@ -26,8 +26,16 @@ def get_inference(inference_id) @mindee_api.req_get_inference(inference_id) end - # Retrieves an inference. - # @param job_id [String] + # Retrieves a result from a given queue or URL to the result. + # @param product [Class] The return class. + # @param resource [String] ID of the inference or URL to the result. + # @return [Mindee::Parsing::V2::BaseResponse] + def get_result(product, resource) + @mindee_api.req_get_result(product, resource) + end + + # Retrieves an inference from a given queue or URL to the job. + # @param job_id [String] ID of the job. # @return [Mindee::Parsing::V2::JobResponse] def get_job(job_id) @mindee_api.req_get_job(job_id) @@ -38,22 +46,47 @@ def get_job(job_id) # The source of the input document (local file or URL). # @param params [Hash, InferenceParameters] # @return [Mindee::Parsing::V2::JobResponse] - def enqueue_inference(input_source, params) - normalized_params = normalize_inference_parameters(params) + def enqueue_inference(input_source, params, disable_redundant_warnings: false) + unless disable_redundant_warnings + warn '[DEPRECATION] `enqueue_inference` is deprecated; use `enqueue` instead.', uplevel: 1 + end + normalized_params = normalize_parameters(Input::InferenceParameters, params) + enqueue(Mindee::Parsing::V2::Inference, input_source, normalized_params) + end + + # Enqueue a document for async parsing. + # @param product [Class] The return class. + # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] + # The source of the input document (local file or URL). + # @param params [Hash, InferenceParameters] Parameters for the inference. + # @return [Mindee::Parsing::V2::JobResponse] + def enqueue( + product, + input_source, + params + ) + normalized_params = normalize_parameters(product.params_type, params) + normalized_params.validate_async_params logger.debug("Enqueueing document to model '#{normalized_params.model_id}'.") - @mindee_api.req_post_inference_enqueue(input_source, normalized_params) + @mindee_api.req_post_enqueue(input_source, normalized_params) end - # Enqueue a document for async parsing and automatically try to retrieve it. + # Enqueues to an asynchronous endpoint and automatically polls for a response. + # + # @param product [Class] The return class. # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] # The source of the input document (local file or URL). # @param params [Hash, InferenceParameters] Parameters for the inference. - # @return [Mindee::Parsing::V2::InferenceResponse] - def enqueue_and_get_inference(input_source, params) - normalized_params = normalize_inference_parameters(params) + # @return [Mindee::Parsing::Common::ApiResponse] + def enqueue_and_get_result( + product, + input_source, + params + ) + enqueue_response = enqueue(product, input_source, params) + normalized_params = normalize_parameters(product.params_type, params) normalized_params.validate_async_params - enqueue_response = enqueue_inference(input_source, normalized_params) if enqueue_response.job.id.nil? || enqueue_response.job.id.empty? logger.error("Failed enqueueing:\n#{enqueue_response.raw_http}") @@ -70,8 +103,8 @@ def enqueue_and_get_inference(input_source, params) while retry_counter < normalized_params.polling_options.max_retries if poll_results.job.status == 'Failed' break - elsif poll_results.job.status == 'Processed' - return get_inference(poll_results.job.id) + elsif !poll_results.job.result_url.nil? + return get_result(product, poll_results.job.result_url) end logger.debug( @@ -97,13 +130,32 @@ def enqueue_and_get_inference(input_source, params) "Asynchronous parsing request timed out after #{sec_count} seconds" end + # Enqueue a document for async parsing and automatically try to retrieve it. + # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] + # The source of the input document (local file or URL). + # @param params [Hash, InferenceParameters] Parameters for the inference. + # @return [Mindee::Parsing::V2::InferenceResponse] + def enqueue_and_get_inference(input_source, params, disable_redundant_warnings: false) + unless disable_redundant_warnings + warn '[DEPRECATION] `enqueue_and_get_inference` is deprecated; use `enqueue_and_get_result` instead.', + uplevel: 1 + end + + response = enqueue_and_get_result(Mindee::Parsing::V2::Inference, input_source, params) + unless response.is_a?(Mindee::Parsing::V2::InferenceResponse) + raise TypeError, "Invalid response type \"#{response.class}\"" + end + + response + end + # If needed, converts the parsing options provided as a hash into a proper InferenceParameters object. - # @param params [Hash, InferenceParameters] Params. - # @return [InferenceParameters] - def normalize_inference_parameters(params) - return params if params.is_a?(Input::InferenceParameters) + # @param params [Hash, Class] Params. + # @return [BaseParameters] + def normalize_parameters(param_class, params) + return param_class.from_hash(params: params) if params.is_a?(Hash) - Input::InferenceParameters.from_hash(params: params) + params end end end diff --git a/lib/mindee/errors/mindee_http_error_v2.rb b/lib/mindee/errors/mindee_http_error_v2.rb index 2cde4df5..90af794a 100644 --- a/lib/mindee/errors/mindee_http_error_v2.rb +++ b/lib/mindee/errors/mindee_http_error_v2.rb @@ -18,7 +18,7 @@ class MindeeHTTPErrorV2 < MindeeError # @return [Array] A list of explicit error details. attr_reader :errors - # @param http_error [Hash, Parsing::V2::ErrorResponse] + # @param http_error [Hash, Mindee::Parsing::V2::ErrorResponse] def initialize(http_error) if http_error.is_a?(Parsing::V2::ErrorResponse) http_error = { 'detail' => http_error.detail, @@ -33,7 +33,7 @@ def initialize(http_error) @code = http_error['code'] @errors = if http_error.key?('errors') http_error['errors'].map do |error| - Parsing::V2::ErrorItem.new(error) + Mindee::Parsing::V2::ErrorItem.new(error) end else [] diff --git a/lib/mindee/geometry/point.rb b/lib/mindee/geometry/point.rb index 18ff2135..3bc98e9a 100644 --- a/lib/mindee/geometry/point.rb +++ b/lib/mindee/geometry/point.rb @@ -31,6 +31,11 @@ def [](key) throw '0 or 1 only' end end + + # @return [String] Point as a string. + def to_s + "(#{@x},#{@y})" + end end end end diff --git a/lib/mindee/geometry/polygon.rb b/lib/mindee/geometry/polygon.rb index 28c7f446..e2e2b1ad 100644 --- a/lib/mindee/geometry/polygon.rb +++ b/lib/mindee/geometry/polygon.rb @@ -27,6 +27,11 @@ def point_in_y?(point) min_max = Geometry.get_min_max_y(self) point.y.between?(min_max.min, min_max.max) end + + # @return [String] Polygon as a string. + def to_s + "(#{map(&:to_s).join(', ')})" + end end end end diff --git a/lib/mindee/http/mindee_api_v2.rb b/lib/mindee/http/mindee_api_v2.rb index 8465f03d..1c1d4b7e 100644 --- a/lib/mindee/http/mindee_api_v2.rb +++ b/lib/mindee/http/mindee_api_v2.rb @@ -20,16 +20,16 @@ def initialize(api_key: nil) # Sends a file to the inference queue. # # @param input_source [Input::Source::LocalInputSource, Input::Source::URLInputSource] - # @param params [Input::InferenceParameters] + # @param params [Input::BaseParameters] # @return [Mindee::Parsing::V2::JobResponse] # @raise [Mindee::Errors::MindeeHttpErrorV2] - def req_post_inference_enqueue(input_source, params) + def req_post_enqueue(input_source, params) @settings.check_api_key response = enqueue( input_source, params ) - Parsing::V2::JobResponse.new(process_response(response)) + Mindee::Parsing::V2::JobResponse.new(process_response(response)) end # Retrieves a queued inference. @@ -37,27 +37,69 @@ def req_post_inference_enqueue(input_source, params) # @param inference_id [String] # @return [Mindee::Parsing::V2::InferenceResponse] def req_get_inference(inference_id) + req_get_result(Parsing::V2::Inference, inference_id) + end + + # Retrieves a result from a given queue. + # @param product [Class] The return class. + # @param resource [String] ID of the inference or URL to the result. + # @return [Mindee::Parsing::V2::BaseResponse] + def req_get_result(product, resource) + return req_get_result_url(product.response_type, resource) if uri?(resource) + @settings.check_api_key - response = inference_result_req_get( - inference_id + response = result_req_get( + resource, + product ) - Parsing::V2::InferenceResponse.new(process_response(response)) + product.response_type.new(process_response(response)) end # Retrieves a queued job. # - # @param job_id [String] + # @param job_id [String] ID of the job or URL to the job. # @return [Mindee::Parsing::V2::JobResponse] def req_get_job(job_id) @settings.check_api_key response = inference_job_req_get( job_id ) - Parsing::V2::JobResponse.new(process_response(response)) + Mindee::Parsing::V2::JobResponse.new(process_response(response)) end private + # Retrieves a queued job. + # + # @param url [String] + # @return [Mindee::Parsing::V2::JobResponse] + def req_get_job_url(url) + @settings.check_api_key + response = poll(url) + Mindee::Parsing::V2::JobResponse.new(process_response(response)) + end + + # Retrieves a queued job. + # + # @param result_class [Mindee::V2::Parsing::BaseResponse] + # @param url [String] + # @return [Mindee::Parsing::V2::JobResponse] + def req_get_result_url(result_class, url) + @settings.check_api_key + response = poll(url) + result_class.new(process_response(response)) + end + + # @param resource [String] Resource to check. + # @return [Boolean] + def uri?(resource) + uri = URI.parse(resource) + throw Mindee::Errors::MindeeError, 'HTTP is not supported.' if uri.scheme == 'http' + uri.scheme == 'https' + rescue URI::BadURIError, URI::InvalidURIError + false + end + # Converts an HTTP response to a parsed response object. # # @param response [Net::HTTPResponse, nil] @@ -111,6 +153,15 @@ def inference_result_req_get(queue_id) poll("#{@settings.base_url}/inferences/#{queue_id}") end + # Polls the API for the result of an inference. + # + # @param queue_id [String] ID of the queue. + # @param product [Class] The return class. + # @return [Net::HTTPResponse] + def result_req_get(queue_id, product) + poll("#{@settings.base_url}/products/#{product.slug}/results/#{queue_id}") + end + # Handle parameters for the enqueue form # @param form_data [Array] Array of form fields # @param params [Input::InferenceParameters] Inference options. @@ -130,10 +181,10 @@ def enqueue_form_options(form_data, params) end # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] - # @param params [Input::InferenceParameters] Inference options. + # @param params [Input::BaseParameters] Inference options. # @return [Net::HTTPResponse, nil] def enqueue(input_source, params) - uri = URI("#{@settings.base_url}/inferences/enqueue") + uri = URI("#{@settings.base_url}/products/#{params.slug}/enqueue") form_data = if input_source.is_a?(Mindee::Input::Source::URLInputSource) [['url', input_source.url]] # : Array[untyped] @@ -143,8 +194,7 @@ def enqueue(input_source, params) end form_data.push(['model_id', params.model_id]) - # deal with other parameters - form_data = enqueue_form_options(form_data, params) + form_data = params.append_form_data(form_data) headers = { 'Authorization' => @settings.api_key, diff --git a/lib/mindee/input/base_parameters.rb b/lib/mindee/input/base_parameters.rb new file mode 100644 index 00000000..680353e5 --- /dev/null +++ b/lib/mindee/input/base_parameters.rb @@ -0,0 +1,146 @@ +# frozen_string_literal: true + +module Mindee + module Input + # Base class for parameters accepted by all V2 endpoints. + class BaseParameters + # @return [String] ID of the model (required). + attr_reader :model_id + + # @return [String, nil] Optional alias for the file. + attr_reader :file_alias + + # @return [Array, nil] Optional list of Webhooks IDs to propagate the API response to. + attr_reader :webhook_ids + + # @return [PollingOptions] Options for polling. Set only if having timeout issues. + attr_reader :polling_options + + # @return [Boolean, nil] Whether to close the file after parsing. + attr_reader :close_file + + # @param [String] model_id ID of the model + # @param [String, nil] file_alias File alias, if applicable. + # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. + # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Boolean, nil] close_file Whether to close the file after parsing. + def initialize( + model_id, + file_alias: nil, + webhook_ids: nil, + polling_options: nil, + close_file: true + ) + raise Errors::MindeeInputError, 'Model ID is required.' if model_id.empty? || model_id.nil? + + @model_id = model_id + @file_alias = file_alias + @webhook_ids = webhook_ids || [] + @polling_options = get_clean_polling_options(polling_options) + @close_file = close_file.nil? || close_file + end + + # @return [String] Slug for the endpoint. + def self.slug + if self == BaseParameters + raise NotImplementedError, 'Cannot access `slug` directly on the BaseParameters class.' + end + + '' + end + + # @return [String] Slug for the endpoint. + def slug + self.class.slug + end + + def self.from_hash(params: {}) + load_from_hash(params: params) + new( + params[:model_id], + file_alias: params[:file_alias], + webhook_ids: params[:webhook_ids], + polling_options: params[:polling_options], + close_file: params[:close_file] + ) + end + + # Loads a prediction from a Hash. + # @param [Hash] params Parameters to provide as a hash. + # @return [Hash] + def self.load_from_hash(params: {}) + params.transform_keys!(&:to_sym) + + if params.empty? || params[:model_id].nil? || params[:model_id].empty? + raise Errors::MindeeInputError, 'Model ID is required.' + end + + polling_options_input = params.fetch(:page_options, PollingOptions.new) + if polling_options_input.is_a?(Hash) + polling_options_input = polling_options_input.transform_keys(&:to_sym) + PollingOptions.new( + initial_delay_sec: polling_options_input.fetch(:initial_delay_sec, 2.0), + delay_sec: polling_options_input.fetch(:delay_sec, 1.5), + max_retries: polling_options_input.fetch(:max_retries, 80) + ) + end + params + end + + # Appends base form data to the provided array. + # @param [Array] form_data Array of form fields + # @return [Array] + def append_form_data(form_data) + form_data.push(['file_alias', @file_alias]) if @file_alias + webhook_ids = @webhook_ids || [] + form_data.push(['webhook_ids', webhook_ids.join(',')]) unless @webhook_ids.nil? || webhook_ids.empty? + form_data + end + + # Validates the parameters for async auto-polling + def validate_async_params + min_delay_sec = 1 + min_initial_delay_sec = 1 + min_retries = 2 + + if @polling_options.delay_sec < min_delay_sec + raise ArgumentError, + "Cannot set auto-poll delay to less than #{min_delay_sec} second(s)" + end + if @polling_options.initial_delay_sec < min_initial_delay_sec + raise ArgumentError, + "Cannot set initial parsing delay to less than #{min_initial_delay_sec} second(s)" + end + return unless @polling_options.max_retries < min_retries + + raise ArgumentError, + "Cannot set auto-poll retries to less than #{min_retries}" + end + + private + + # Cleans a proper polling options object potentially from a hash. + # @param [Hash, PollingOptions, nil] polling_options Polling options. + # @return [PollingOptions] Valid polling options object. + def get_clean_polling_options(polling_options) + return PollingOptions.new if polling_options.is_a?(PollingOptions) + + if polling_options.is_a?(Hash) + polling_options = polling_options.transform_keys(&:to_sym) + output_polling_options = PollingOptions.new( + initial_delay_sec: polling_options.fetch(:initial_delay_sec, 2.0), + delay_sec: polling_options.fetch(:delay_sec, 1.5), + max_retries: polling_options.fetch(:max_retries, 80) + ) + else + output_polling_options = if polling_options.is_a?(PollingOptions) + polling_options || PollingOptions.new + else + PollingOptions.new + end + end + output_polling_options + end + end + end +end diff --git a/lib/mindee/input/inference_parameters.rb b/lib/mindee/input/inference_parameters.rb index 1a38fca4..a377d052 100644 --- a/lib/mindee/input/inference_parameters.rb +++ b/lib/mindee/input/inference_parameters.rb @@ -1,14 +1,12 @@ # frozen_string_literal: true require_relative 'data_schema' +require_relative '../input/base_parameters' module Mindee module Input # Parameters to set when sending a file for inference. - class InferenceParameters - # @return [String] ID of the model (required). - attr_reader :model_id - + class InferenceParameters < Mindee::Input::BaseParameters # @return [Boolean, nil] Enhance extraction accuracy with Retrieval-Augmented Generation. attr_reader :rag @@ -24,35 +22,30 @@ class InferenceParameters # Calculate confidence scores for all fields, and fill their confidence attribute. attr_reader :confidence - # @return [String, nil] Optional alias for the file. - attr_reader :file_alias - # @return [String, nil] Additional text context used by the model during inference. # Not recommended, for specific use only. attr_reader :text_context - # @return [Array, nil] Optional list of Webhooks IDs to propagate the API response to. - attr_reader :webhook_ids - - # @return [PollingOptions] Options for polling. Set only if having timeout issues. - attr_reader :polling_options - # @return [DataSchemaField] attr_reader :data_schema - # @return [Boolean, nil] Whether to close the file after parsing. - attr_reader :close_file + # @return [String] Slug for the endpoint. + def self.slug + 'extraction' + end # rubocop:disable Metrics/ParameterLists # @param [String] model_id ID of the model - # @param [nil] rag Whether to enable RAG. - # @param [nil] raw_text Whether to enable rax text. - # @param [nil] polygon Whether to enable polygons. - # @param [nil] confidence Whether to enable confidence scores. - # @param [nil] file_alias File alias, if applicable. - # @param [nil] webhook_ids - # @param [nil] polling_options - # @param [TrueClass] close_file + # @param [Boolean, nil] rag Whether to enable RAG. + # @param [Boolean, nil] raw_text Whether to enable rax text. + # @param [Boolean, nil] polygon Whether to enable polygons. + # @param [Boolean, nil] confidence Whether to enable confidence scores. + # @param [String, nil] file_alias File alias, if applicable. + # @param [Array, nil] webhook_ids + # @param [String, nil] text_context + # @param [Hash, nil] polling_options + # @param [Boolean, nil] close_file + # @param [DataSchemaField, String, Hash nil] data_schema def initialize( model_id, rag: nil, @@ -66,96 +59,60 @@ def initialize( close_file: true, data_schema: nil ) - raise Errors::MindeeInputError, 'Model ID is required.' if model_id.empty? || model_id.nil? + super( + model_id, + file_alias: file_alias, + webhook_ids: webhook_ids, + polling_options: polling_options, + close_file: close_file + ) - @model_id = model_id @rag = rag @raw_text = raw_text @polygon = polygon @confidence = confidence - @file_alias = file_alias - @webhook_ids = webhook_ids || [] @text_context = text_context - @polling_options = get_clean_polling_options(polling_options) - @close_file = close_file.nil? || close_file @data_schema = DataSchema.new(data_schema) unless data_schema.nil? # rubocop:enable Metrics/ParameterLists end - # Validates the parameters for async auto-polling - def validate_async_params - min_delay_sec = 1 - min_initial_delay_sec = 1 - min_retries = 2 + # Appends inference-specific form data to the provided array. + # @param [Array] form_data Array of form fields + # @return [Array] + def append_form_data(form_data) + new_form_data = super - if @polling_options.delay_sec < min_delay_sec - raise ArgumentError, - "Cannot set auto-poll delay to less than #{min_delay_sec} second(s)" - end - if @polling_options.initial_delay_sec < min_initial_delay_sec - raise ArgumentError, - "Cannot set initial parsing delay to less than #{min_initial_delay_sec} second(s)" - end - return unless @polling_options.max_retries < min_retries + new_form_data.push(['rag', @rag.to_s]) unless @rag.nil? + new_form_data.push(['raw_text', @raw_text.to_s]) unless @raw_text.nil? + new_form_data.push(['polygon', @polygon.to_s]) unless @polygon.nil? + new_form_data.push(['confidence', @confidence.to_s]) unless @confidence.nil? + new_form_data.push(['text_context', @text_context]) if @text_context + new_form_data.push(['data_schema', @data_schema.to_s]) if @data_schema - raise ArgumentError, - "Cannot set auto-poll retries to less than #{min_retries}" + new_form_data end # Loads a prediction from a Hash. # @param [Hash] params Parameters to provide as a hash. # @return [InferenceParameters] def self.from_hash(params: {}) - params.transform_keys!(&:to_sym) - - if params.empty? || params[:model_id].nil? || params[:model_id].empty? - raise Errors::MindeeInputError, 'Model ID is required.' - end - - model_id = params.fetch(:model_id) rag = params.fetch(:rag, nil) raw_text = params.fetch(:raw_text, nil) polygon = params.fetch(:polygon, nil) confidence = params.fetch(:confidence, nil) - file_alias = params.fetch(:file_alias, nil) - webhook_ids = params.fetch(:webhook_ids, []) - polling_options_input = params.fetch(:page_options, PollingOptions.new) - if polling_options_input.is_a?(Hash) - polling_options_input = polling_options_input.transform_keys(&:to_sym) - PollingOptions.new( - initial_delay_sec: polling_options_input.fetch(:initial_delay_sec, 2.0), - delay_sec: polling_options_input.fetch(:delay_sec, 1.5), - max_retries: polling_options_input.fetch(:max_retries, 80) - ) - end - close_file = params.fetch(:close_file, true) - InferenceParameters.new(model_id, rag: rag, raw_text: raw_text, polygon: polygon, confidence: confidence, - file_alias: file_alias, webhook_ids: webhook_ids, close_file: close_file) - end - - private - - # Cleans a proper polling options object potentially from a hash. - # @param [Hash, PollingOptions, nil] polling_options Polling options. - # @return [PollingOptions] Valid polling options object. - def get_clean_polling_options(polling_options) - return PollingOptions.new if polling_options.is_a?(PollingOptions) - - if polling_options.is_a?(Hash) - polling_options = polling_options.transform_keys(&:to_sym) - output_polling_options = PollingOptions.new( - initial_delay_sec: polling_options.fetch(:initial_delay_sec, 2.0), - delay_sec: polling_options.fetch(:delay_sec, 1.5), - max_retries: polling_options.fetch(:max_retries, 80) - ) - else - output_polling_options = if polling_options.is_a?(PollingOptions) - polling_options || PollingOptions.new - else - PollingOptions.new - end - end - output_polling_options + base_params = load_from_hash(params: params) + new_params = base_params.merge(rag: rag, raw_text: raw_text, polygon: polygon, confidence: confidence) + model_id = new_params.fetch(:model_id) + + InferenceParameters.new( + model_id, rag: rag, + raw_text: raw_text, + polygon: polygon, + confidence: confidence, + file_alias: params.fetch(:file_alias, nil), + webhook_ids: params.fetch(:webhook_ids, nil), + close_file: params.fetch(:close_file, true) + ) end end end diff --git a/lib/mindee/input/local_response.rb b/lib/mindee/input/local_response.rb index ca8988ab..5e6c1440 100644 --- a/lib/mindee/input/local_response.rb +++ b/lib/mindee/input/local_response.rb @@ -35,7 +35,7 @@ def initialize(input_file) # @return [Hash] def as_hash @file.rewind - file_str = @file.read + file_str = @file.read or raise 'File could not be read' JSON.parse(file_str, object_class: Hash) rescue JSON::ParserError raise Errors::MindeeInputError, "File is not a valid dict. #{file_str}" @@ -54,7 +54,8 @@ def get_hmac_signature(secret_key) algorithm = OpenSSL::Digest.new('sha256') begin @file.rewind - mac = OpenSSL::HMAC.hexdigest(algorithm, self.class.process_secret_key(secret_key), @file.read) + mac = OpenSSL::HMAC.hexdigest(algorithm, self.class.process_secret_key(secret_key), + @file.read || raise('File could not be read')) rescue StandardError raise Errors::MindeeInputError, 'Could not get HMAC signature from payload.' end @@ -70,9 +71,9 @@ def valid_hmac_signature?(secret_key, signature) # Deserializes a loaded response # @param response_class [Parsing::V2::CommonResponse] class to return. - # @return [Parsing::V2::JobResponse, Parsing::V2::InferenceResponse] + # @return [Parsing::V2::JobResponse, Mindee::V2::Parsing::CommonResponse] def deserialize_response(response_class) - response_class.new(as_hash) # : Parsing::V2::JobResponse | Parsing::V2::InferenceResponse + response_class.new(as_hash) # : Mindee::Parsing::V2::JobResponse | Mindee::Parsing::V2::InferenceResponse rescue StandardError raise Errors::MindeeInputError, 'Invalid response provided.' end diff --git a/lib/mindee/parsing/v2/field/field_location.rb b/lib/mindee/parsing/v2/field/field_location.rb index e6902988..d5869b57 100644 --- a/lib/mindee/parsing/v2/field/field_location.rb +++ b/lib/mindee/parsing/v2/field/field_location.rb @@ -24,7 +24,7 @@ def initialize(server_response) # # @return [String] def to_s - @polygon ? @polygon.to_s : '' + "#{polygon} on page #{page}" end end end diff --git a/lib/mindee/parsing/v2/inference.rb b/lib/mindee/parsing/v2/inference.rb index b462fc51..32801564 100644 --- a/lib/mindee/parsing/v2/inference.rb +++ b/lib/mindee/parsing/v2/inference.rb @@ -5,47 +5,34 @@ require_relative 'inference_file' require_relative 'inference_result' require_relative 'inference_active_options' +require_relative '../../v2/parsing/base_inference' module Mindee module Parsing module V2 # Complete data returned by an inference request. - class Inference - # @return [String] Identifier of the inference (when provided by API). - attr_reader :id - # @return [InferenceJob] Metadata about the job. - attr_reader :job - # @return [InferenceModel] Information about the model used. - attr_reader :model - # @return [InferenceFile] Information about the processed file. - attr_reader :file + class Inference < Mindee::V2::Parsing::BaseInference # @return [InferenceActiveOptions] Options which were activated during the inference. attr_reader :active_options # @return [InferenceResult] Result contents. attr_reader :result + @params_type = Input::InferenceParameters + @slug = 'extraction' + @response_type = InferenceResponse + # @param server_response [Hash] Hash representation of the JSON returned by the service. def initialize(server_response) - raise ArgumentError, 'server_response must be a Hash' unless server_response.is_a?(Hash) - - @model = InferenceModel.new(server_response['model']) - @job = InferenceJob.new(server_response['job']) if server_response.key?('job') - @file = InferenceFile.new(server_response['file']) + super @active_options = InferenceActiveOptions.new(server_response['active_options']) @result = InferenceResult.new(server_response['result']) - - @id = server_response['id'] end # String representation. # @return [String] def to_s [ - 'Inference', - '#########', - @job.to_s, - @model.to_s, - @file.to_s, + super, @active_options.to_s, @result.to_s, '', diff --git a/lib/mindee/parsing/v2/inference_response.rb b/lib/mindee/parsing/v2/inference_response.rb index 1aac55a8..7911c33b 100644 --- a/lib/mindee/parsing/v2/inference_response.rb +++ b/lib/mindee/parsing/v2/inference_response.rb @@ -2,18 +2,21 @@ require_relative 'common_response' require_relative 'inference' +require_relative '../../v2/parsing/base_response' module Mindee module Parsing module V2 # HTTP response wrapper that embeds a V2 Inference. - class InferenceResponse < CommonResponse + class InferenceResponse < Mindee::V2::Parsing::BaseResponse # @return [Inference] Parsed inference payload. attr_reader :inference + @slug = 'extraction' + @_params_type = Input::InferenceParameters + # @param server_response [Hash] Hash parsed from the API JSON response. def initialize(server_response) - # CommonResponse takes care of the generic metadata (status, etc.) super @inference = Inference.new(server_response['inference']) diff --git a/lib/mindee/v2.rb b/lib/mindee/v2.rb new file mode 100644 index 00000000..6950e5f9 --- /dev/null +++ b/lib/mindee/v2.rb @@ -0,0 +1,4 @@ +# frozen_string_literal: true + +require_relative 'parsing' +require_relative 'product' diff --git a/lib/mindee/v2/parsing/base_inference.rb b/lib/mindee/v2/parsing/base_inference.rb new file mode 100644 index 00000000..31cbe87c --- /dev/null +++ b/lib/mindee/v2/parsing/base_inference.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +require_relative '../product/base_product' +require_relative '../../parsing/v2/inference_response' + +module Mindee + module V2 + module Parsing + # Base class for V2 inference responses. + class BaseInference < Mindee::V2::Product::BaseProduct + # @return [InferenceJob] Metadata about the job. + attr_reader :job + # @return [Parsing::V2::InferenceModel] Model info for the inference. + attr_reader :model + # @return [Parsing::V2::InferenceFile] File info for the inference. + attr_reader :file + # @return [String] ID of the inference. + attr_reader :id + + def initialize(http_response) + raise ArgumentError, 'Server response must be a Hash' unless http_response.is_a?(Hash) + + super() + @model = Mindee::Parsing::V2::InferenceModel.new(http_response['model']) + @file = Mindee::Parsing::V2::InferenceFile.new(http_response['file']) + @id = http_response['id'] + @job = Mindee::Parsing::V2::InferenceJob.new(http_response['job']) if http_response.key?('job') + end + + # String representation. + # @return [String] + def to_s + [ + 'Inference', + '#########', + @job.to_s, + @model.to_s, + @file.to_s, + ].join("\n") + end + end + end + end +end diff --git a/lib/mindee/v2/parsing/base_response.rb b/lib/mindee/v2/parsing/base_response.rb new file mode 100644 index 00000000..a604e156 --- /dev/null +++ b/lib/mindee/v2/parsing/base_response.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Parsing + # Base class for V2 inference responses. + class BaseResponse < Mindee::Parsing::V2::CommonResponse + # @return [BaseInference] The inference result for a split utility request + attr_reader :inference + end + end + end +end diff --git a/lib/mindee/v2/product.rb b/lib/mindee/v2/product.rb new file mode 100644 index 00000000..bc409f7e --- /dev/null +++ b/lib/mindee/v2/product.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +require_relative 'product/classification/classification' +require_relative 'product/crop/crop' +require_relative 'product/extraction/extraction' +require_relative 'product/ocr/ocr' +require_relative 'product/split/split' diff --git a/lib/mindee/v2/product/base_product.rb b/lib/mindee/v2/product/base_product.rb new file mode 100644 index 00000000..a11e0eb1 --- /dev/null +++ b/lib/mindee/v2/product/base_product.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require_relative '../parsing/base_response' + +module Mindee + module V2 + module Product + # Base class for all V2 products. + class BaseProduct + # @return [String] The slug of the endpoint used for this response + @slug = '' + # @return [Class] The class of the parameters used for this response + @params_type = Mindee::Input::BaseParameters + # @return [Class] The class of the response used for this product + @response_type = Mindee::V2::Parsing::BaseResponse + + def initialize + raise StandardError, 'Cannot instantiate abstract class.' if instance_of?(BaseProduct) + end + + class << self + attr_reader :params_type, :slug, :response_type + end + end + end + end +end diff --git a/lib/mindee/v2/product/classification/classification.rb b/lib/mindee/v2/product/classification/classification.rb new file mode 100644 index 00000000..0eff57a8 --- /dev/null +++ b/lib/mindee/v2/product/classification/classification.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require_relative 'classification_response' +require_relative 'params/classification_parameters' + +module Mindee + module V2 + module Product + module Classification + # Classification product. + class Classification < BaseProduct + @slug = 'classification' + @params_type = Mindee::V2::Product::Classification::Params::ClassificationParameters + @response_type = Mindee::V2::Product::Classification::ClassificationResponse + end + end + end + end +end diff --git a/lib/mindee/v2/product/classification/classification_classifier.rb b/lib/mindee/v2/product/classification/classification_classifier.rb new file mode 100644 index 00000000..288ee311 --- /dev/null +++ b/lib/mindee/v2/product/classification/classification_classifier.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Product + module Classification + # Classification of document type from the source file. + class ClassificationClassifier + # @return [String] The document type, as identified on given classification values. + attr_reader :document_type + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + @document_type = server_response['document_type'] + end + + # @return [String] String representation. + def to_s + "Document Type: #{@document_type}" + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/classification/classification_inference.rb b/lib/mindee/v2/product/classification/classification_inference.rb new file mode 100644 index 00000000..837c19c2 --- /dev/null +++ b/lib/mindee/v2/product/classification/classification_inference.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require_relative 'classification_result' + +module Mindee + module V2 + module Product + module Classification + # The inference result for a classification utility request. + class ClassificationInference < Mindee::V2::Parsing::BaseInference + # @return [ClassificationResult] Parsed inference payload. + attr_reader :result + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + super + + @result = ClassificationResult.new(server_response['result']) + end + + # String representation. + # @return [String] + def to_s + [ + super, + @result.to_s, + '', + ].join("\n") + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/classification/classification_response.rb b/lib/mindee/v2/product/classification/classification_response.rb new file mode 100644 index 00000000..0b2f7d3e --- /dev/null +++ b/lib/mindee/v2/product/classification/classification_response.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require_relative '../../parsing/base_response' +require_relative 'classification_inference' +require_relative 'params/classification_parameters' + +module Mindee + module V2 + module Product + module Classification + # HTTP response wrapper that embeds a V2 Inference. + class ClassificationResponse < Mindee::V2::Parsing::BaseResponse + # @return [ClassificationInference] Parsed inference payload. + attr_reader :inference + + # @param server_response [Hash] Hash parsed from the API JSON response. + def initialize(server_response) + super + + @inference = ClassificationInference.new(server_response['inference']) + end + + # String representation. + # @return [String] + def to_s + @inference.to_s + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/classification/classification_result.rb b/lib/mindee/v2/product/classification/classification_result.rb new file mode 100644 index 00000000..a994b089 --- /dev/null +++ b/lib/mindee/v2/product/classification/classification_result.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require_relative 'classification_classifier' + +module Mindee + module V2 + module Product + module Classification + # Result of the document classifier inference. + class ClassificationResult + # @return [ClassificationClassifier] The document type, as identified on given classification values. + attr_reader :classification + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + @classification = ClassificationClassifier.new(server_response['classification']) + end + + # @return [String] String representation. + def to_s + "Classification\n==============\n#{@classification}" + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/classification/params/classification_parameters.rb b/lib/mindee/v2/product/classification/params/classification_parameters.rb new file mode 100644 index 00000000..398b218e --- /dev/null +++ b/lib/mindee/v2/product/classification/params/classification_parameters.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Product + module Classification + module Params + # Parameters accepted by the classification utility v2 endpoint. + class ClassificationParameters < Mindee::Input::BaseParameters + # @return [String] Slug for the endpoint. + def self.slug + 'classification' + end + + # @param [String] model_id ID of the model + # @param [String, nil] file_alias File alias, if applicable. + # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. + # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Boolean, nil] close_file Whether to close the file after parsing. + def initialize( + model_id, + file_alias: nil, + webhook_ids: nil, + polling_options: nil, + close_file: true + ) + super + end + + # Loads the parameters from a Hash. + # @param [Hash] params Parameters to provide as a hash. + # @return [ClassificationParameters] + def self.from_hash(params: {}) + ClassificationParameters.new( + params.fetch(:model_id), + file_alias: params.fetch(:file_alias, nil), + webhook_ids: params.fetch(:webhook_ids, nil), + close_file: params.fetch(:close_file, true) + ) + end + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/crop/crop.rb b/lib/mindee/v2/product/crop/crop.rb new file mode 100644 index 00000000..24211cfb --- /dev/null +++ b/lib/mindee/v2/product/crop/crop.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require_relative 'crop_response' +require_relative 'params/crop_parameters' + +module Mindee + module V2 + module Product + module Crop + # Crop product. + class Crop < BaseProduct + @slug = 'crop' + @params_type = Mindee::V2::Product::Crop::Params::CropParameters + @response_type = Mindee::V2::Product::Crop::CropResponse + end + end + end + end +end diff --git a/lib/mindee/v2/product/crop/crop_inference.rb b/lib/mindee/v2/product/crop/crop_inference.rb new file mode 100644 index 00000000..acc7a3fe --- /dev/null +++ b/lib/mindee/v2/product/crop/crop_inference.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require_relative 'crop_result' + +module Mindee + module V2 + module Product + module Crop + # The inference result for a crop utility request. + class CropInference < Mindee::V2::Parsing::BaseInference + # @return [CropResult] Parsed inference payload. + attr_reader :result + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + super + + @result = CropResult.new(server_response['result']) + end + + # String representation. + # @return [String] + def to_s + [ + super, + @result.to_s, + '', + ].join("\n") + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/crop/crop_item.rb b/lib/mindee/v2/product/crop/crop_item.rb new file mode 100644 index 00000000..036b8c43 --- /dev/null +++ b/lib/mindee/v2/product/crop/crop_item.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Product + module Crop + # Result of a cropped document region. + class CropItem + # @return [String] Type or classification of the detected object. + attr_reader :object_type + # @return [Parsing::V2::Field::FieldLocation] Coordinates of the detected object on the document. + attr_reader :location + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + @object_type = server_response['object_type'] + @location = Mindee::Parsing::V2::Field::FieldLocation.new(server_response['location']) + end + + # String representation. + # @return [String] + def to_s + "* :Location: #{location}\n :Object Type: #{object_type}" + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/crop/crop_response.rb b/lib/mindee/v2/product/crop/crop_response.rb new file mode 100644 index 00000000..90ada724 --- /dev/null +++ b/lib/mindee/v2/product/crop/crop_response.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require_relative '../../parsing/base_response' +require_relative 'params/crop_parameters' +require_relative 'crop_inference' + +module Mindee + module V2 + module Product + module Crop + # HTTP response wrapper that embeds a V2 Inference. + class CropResponse < Mindee::V2::Parsing::BaseResponse + # @return [CropInference] Parsed inference payload. + attr_reader :inference + + # @param server_response [Hash] Hash parsed from the API JSON response. + def initialize(server_response) + super + + @inference = CropInference.new(server_response['inference']) + end + + # String representation. + # @return [String] + def to_s + @inference.to_s + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/crop/crop_result.rb b/lib/mindee/v2/product/crop/crop_result.rb new file mode 100644 index 00000000..6f572fbe --- /dev/null +++ b/lib/mindee/v2/product/crop/crop_result.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require_relative 'crop_item' + +module Mindee + module V2 + module Product + module Crop + # Result of a crop utility inference. + class CropResult + # @return [Array] List of results of cropped document regions. + attr_reader :crops + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + @crops = if server_response.key?('crops') + server_response['crops'].map do |crop| + CropItem.new(crop) + end + end + end + + # String representation. + # @return [String] + def to_s + crops_str = @crops.join("\n") + + "Crops\n=====\n#{crops_str}" + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/crop/params/crop_parameters.rb b/lib/mindee/v2/product/crop/params/crop_parameters.rb new file mode 100644 index 00000000..712fe7ce --- /dev/null +++ b/lib/mindee/v2/product/crop/params/crop_parameters.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Product + module Crop + module Params + # Parameters accepted by the crop utility v2 endpoint. + class CropParameters < Mindee::Input::BaseParameters + # @return [String] Slug for the endpoint. + def self.slug + 'crop' + end + + # @param [String] model_id ID of the model + # @param [String, nil] file_alias File alias, if applicable. + # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. + # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Boolean, nil] close_file Whether to close the file after parsing. + def initialize( + model_id, + file_alias: nil, + webhook_ids: nil, + polling_options: nil, + close_file: true + ) + super + end + + # Loads the parameters from a Hash. + # @param [Hash] params Parameters to provide as a hash. + # @return [CropParameters] + def self.from_hash(params: {}) + CropParameters.new( + params.fetch(:model_id), + file_alias: params.fetch(:file_alias, nil), + webhook_ids: params.fetch(:webhook_ids, nil), + close_file: params.fetch(:close_file, true) + ) + end + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/extraction/extraction.rb b/lib/mindee/v2/product/extraction/extraction.rb new file mode 100644 index 00000000..c01afee3 --- /dev/null +++ b/lib/mindee/v2/product/extraction/extraction.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require_relative 'extraction_response' +require_relative 'params/extraction_parameters' + +module Mindee + module V2 + module Product + module Extraction + # Extraction product. + # Note: currently a placeholder for the `Inference` class. + class Extraction < Mindee::Parsing::V2::Inference + @params_type = Product::Extraction::Params::ExtractionParameters + @response_type = Product::Extraction::ExtractionResponse + end + end + end + end +end diff --git a/lib/mindee/v2/product/extraction/extraction_inference.rb b/lib/mindee/v2/product/extraction/extraction_inference.rb new file mode 100644 index 00000000..08f66d17 --- /dev/null +++ b/lib/mindee/v2/product/extraction/extraction_inference.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require_relative '../../../parsing/v2/inference' +require_relative 'extraction_result' + +module Mindee + module V2 + module Product + module Extraction + # Extraction inference. + class ExtractionInference < Mindee::Parsing::V2::Inference + # @return [ExtractionResult] Parsed inference payload. + attr_reader :result + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + super + + @result = Mindee::V2::Product::Extraction::ExtractionResult.new(server_response['result']) + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/extraction/extraction_response.rb b/lib/mindee/v2/product/extraction/extraction_response.rb new file mode 100644 index 00000000..5b9f29b7 --- /dev/null +++ b/lib/mindee/v2/product/extraction/extraction_response.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +require_relative '../../../parsing/v2/inference_response' +require_relative 'extraction_inference' + +module Mindee + module V2 + module Product + module Extraction + # HTTP response wrapper that embeds a V2 Inference. + class ExtractionResponse < Mindee::Parsing::V2::InferenceResponse + # @return [ExtractionInference] Parsed inference payload. + attr_reader :inference + + def initialize(server_response) + super + @inference = Mindee::V2::Product::Extraction::ExtractionInference.new(server_response['inference']) + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/extraction/extraction_result.rb b/lib/mindee/v2/product/extraction/extraction_result.rb new file mode 100644 index 00000000..ac080ea3 --- /dev/null +++ b/lib/mindee/v2/product/extraction/extraction_result.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +require_relative '../../../parsing/v2/inference_result' + +module Mindee + module V2 + module Product + module Extraction + # Result of an extraction utility inference. + class ExtractionResult < Mindee::Parsing::V2::InferenceResult + end + end + end + end +end diff --git a/lib/mindee/v2/product/extraction/params/extraction_parameters.rb b/lib/mindee/v2/product/extraction/params/extraction_parameters.rb new file mode 100644 index 00000000..daabbc72 --- /dev/null +++ b/lib/mindee/v2/product/extraction/params/extraction_parameters.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +require_relative '../../../../input/inference_parameters' + +module Mindee + module V2 + module Product + module Extraction + module Params + # Parameters accepted by the extraction v2 endpoint. + # Currently a placeholder for the InferenceParameters class. + class ExtractionParameters < Input::InferenceParameters + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/ocr/ocr.rb b/lib/mindee/v2/product/ocr/ocr.rb new file mode 100644 index 00000000..50ff9e80 --- /dev/null +++ b/lib/mindee/v2/product/ocr/ocr.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require_relative 'ocr_response' +require_relative 'params/ocr_parameters' + +module Mindee + module V2 + module Product + module Ocr + # Ocr product. + class Ocr < BaseProduct + @slug = 'ocr' + @params_type = Mindee::V2::Product::Ocr::Params::OcrParameters + @response_type = Mindee::V2::Product::Ocr::OcrResponse + end + end + end + end +end diff --git a/lib/mindee/v2/product/ocr/ocr_inference.rb b/lib/mindee/v2/product/ocr/ocr_inference.rb new file mode 100644 index 00000000..5daeb062 --- /dev/null +++ b/lib/mindee/v2/product/ocr/ocr_inference.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require_relative 'ocr_result' + +module Mindee + module V2 + module Product + module Ocr + # The inference result for an OCR utility request. + class OcrInference < Mindee::V2::Parsing::BaseInference + # @return [OcrResult] Parsed inference payload. + attr_reader :result + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + super + + @result = OcrResult.new(server_response['result']) + end + + # String representation. + # @return [String] + def to_s + [ + super, + @result.to_s, + '', + ].join("\n") + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/ocr/ocr_page.rb b/lib/mindee/v2/product/ocr/ocr_page.rb new file mode 100644 index 00000000..5de39688 --- /dev/null +++ b/lib/mindee/v2/product/ocr/ocr_page.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +require_relative 'ocr_word' + +module Mindee + module V2 + module Product + module Ocr + # OCR result for a single page. + class OcrPage + # @return [Array] List of words extracted from the document page. + attr_reader :words + # @return [String] Full text content extracted from the document page. + attr_reader :content + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + @words = server_response['words'].map { |word| OcrWord.new(word) } + @content = server_response['content'] + end + + # String representation. + # @return [String] + def to_s + ocr_words = "\n" + ocr_words += @words.map(&:to_s).join("\n\n") if @words&.any? + "OCR Words\n======#{ocr_words}\n\n:Content: #{@content}" + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/ocr/ocr_response.rb b/lib/mindee/v2/product/ocr/ocr_response.rb new file mode 100644 index 00000000..3bac4d1e --- /dev/null +++ b/lib/mindee/v2/product/ocr/ocr_response.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require_relative '../../parsing/base_response' +require_relative 'params/ocr_parameters' +require_relative 'ocr_inference' + +module Mindee + module V2 + module Product + module Ocr + # HTTP response wrapper that embeds a V2 Inference. + class OcrResponse < Mindee::V2::Parsing::BaseResponse + # @return [OcrInference] Parsed inference payload. + attr_reader :inference + + # @param server_response [Hash] Hash parsed from the API JSON response. + def initialize(server_response) + super + + @inference = OcrInference.new(server_response['inference']) + end + + # String representation. + # @return [String] + def to_s + @inference.to_s + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/ocr/ocr_result.rb b/lib/mindee/v2/product/ocr/ocr_result.rb new file mode 100644 index 00000000..093a6ae9 --- /dev/null +++ b/lib/mindee/v2/product/ocr/ocr_result.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require_relative 'ocr_page' + +module Mindee + module V2 + module Product + module Ocr + # Result of a ocr utility inference. + class OcrResult + # @return [Array] List of OCR results for each page in the document. + attr_reader :pages + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + @pages = if server_response.key?('pages') + server_response['pages'].map do |pages| + OcrPage.new(pages) + end + end + end + + # String representation. + # @return [String] + def to_s + pages_str = @pages.join("\n") + + "Pages\n======\n#{pages_str}" + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/ocr/ocr_word.rb b/lib/mindee/v2/product/ocr/ocr_word.rb new file mode 100644 index 00000000..489c7dc8 --- /dev/null +++ b/lib/mindee/v2/product/ocr/ocr_word.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Product + module Ocr + # OCR result for a single word extracted from the document page. + class OcrWord + # @return [String] Text content of the word. + attr_reader :content + # @return [Mindee::Geometry::Polygon] Position information as a list of points in clockwise order. + attr_reader :polygon + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + @content = server_response['content'] + @polygon = Mindee::Geometry::Polygon.new(server_response['polygon']) + end + + # String representation. + # @return [String] + def to_s + @content + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/ocr/params/ocr_parameters.rb b/lib/mindee/v2/product/ocr/params/ocr_parameters.rb new file mode 100644 index 00000000..ca068c83 --- /dev/null +++ b/lib/mindee/v2/product/ocr/params/ocr_parameters.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Product + module Ocr + module Params + # Parameters accepted by the ocr utility v2 endpoint. + class OcrParameters < Mindee::Input::BaseParameters + # @return [String] Slug for the endpoint. + def self.slug + 'ocr' + end + + # @param [String] model_id ID of the model + # @param [String, nil] file_alias File alias, if applicable. + # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. + # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Boolean, nil] close_file Whether to close the file after parsing. + def initialize( + model_id, + file_alias: nil, + webhook_ids: nil, + polling_options: nil, + close_file: true + ) + super + end + + # Loads the parameters from a Hash. + # @param [Hash] params Parameters to provide as a hash. + # @return [OcrParameters] + def self.from_hash(params: {}) + OcrParameters.new( + params.fetch(:model_id), + file_alias: params.fetch(:file_alias, nil), + webhook_ids: params.fetch(:webhook_ids, nil), + close_file: params.fetch(:close_file, true) + ) + end + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/split/params/split_parameters.rb b/lib/mindee/v2/product/split/params/split_parameters.rb new file mode 100644 index 00000000..57dd79b8 --- /dev/null +++ b/lib/mindee/v2/product/split/params/split_parameters.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Product + module Split + module Params + # Parameters accepted by the split utility v2 endpoint. + class SplitParameters < Mindee::Input::BaseParameters + # @return [String] Slug for the endpoint. + def self.slug + 'split' + end + + # @param [String] model_id ID of the model + # @param [String, nil] file_alias File alias, if applicable. + # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. + # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Boolean, nil] close_file Whether to close the file after parsing. + def initialize( + model_id, + file_alias: nil, + webhook_ids: nil, + polling_options: nil, + close_file: true + ) + super + end + + # Loads the parameters from a Hash. + # @param [Hash] params Parameters to provide as a hash. + # @return [SplitParameters] + def self.from_hash(params: {}) + SplitParameters.new( + params.fetch(:model_id), + file_alias: params.fetch(:file_alias, nil), + webhook_ids: params.fetch(:webhook_ids, nil), + close_file: params.fetch(:close_file, true) + ) + end + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/split/split.rb b/lib/mindee/v2/product/split/split.rb new file mode 100644 index 00000000..40318cea --- /dev/null +++ b/lib/mindee/v2/product/split/split.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require_relative 'split_response' +require_relative 'params/split_parameters' + +module Mindee + module V2 + module Product + module Split + # Split product. + class Split < BaseProduct + @slug = 'split' + @params_type = Mindee::V2::Product::Split::Params::SplitParameters + @response_type = Mindee::V2::Product::Split::SplitResponse + end + end + end + end +end diff --git a/lib/mindee/v2/product/split/split_inference.rb b/lib/mindee/v2/product/split/split_inference.rb new file mode 100644 index 00000000..4401c9b9 --- /dev/null +++ b/lib/mindee/v2/product/split/split_inference.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require_relative 'split_result' + +module Mindee + module V2 + module Product + module Split + # Split inference result. + class SplitInference < Mindee::V2::Parsing::BaseInference + # @return [SplitResult] Result of a split inference. + attr_reader :result + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + super + + @result = SplitResult.new(server_response['result']) + end + + # String representation. + # @return [String] + def to_s + [ + super, + @result.to_s, + '', + ].join("\n") + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/split/split_range.rb b/lib/mindee/v2/product/split/split_range.rb new file mode 100644 index 00000000..6eaabd12 --- /dev/null +++ b/lib/mindee/v2/product/split/split_range.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Product + module Split + # Split inference result. + class SplitRange + # @return [Array] 0-based page indexes, where the first integer indicates the start page and the + # second integer indicates the end page. + attr_reader :page_range + # @return [String] The document type, as identified on given classification values. + attr_reader :document_type + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + @page_range = server_response['page_range'] + @document_type = server_response['document_type'] + end + + # String representation. + # @return [String] + def to_s + "* :Page Range: #{@page_range}\n :Document Type: #{@document_type}" + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/split/split_response.rb b/lib/mindee/v2/product/split/split_response.rb new file mode 100644 index 00000000..9fdafae1 --- /dev/null +++ b/lib/mindee/v2/product/split/split_response.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require_relative '../../parsing/base_response' +require_relative 'params/split_parameters' +require_relative 'split_inference' + +module Mindee + module V2 + module Product + module Split + # HTTP response wrapper that embeds a V2 Inference. + class SplitResponse < Mindee::V2::Parsing::BaseResponse + # @return [SplitInference] Parsed inference payload. + attr_reader :inference + + # @param server_response [Hash] Hash parsed from the API JSON response. + def initialize(server_response) + super + + @inference = SplitInference.new(server_response['inference']) + end + + # String representation. + # @return [String] + def to_s + @inference.to_s + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/split/split_result.rb b/lib/mindee/v2/product/split/split_result.rb new file mode 100644 index 00000000..b11d284b --- /dev/null +++ b/lib/mindee/v2/product/split/split_result.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require_relative 'split_range' + +module Mindee + module V2 + module Product + module Split + # Result of a split utility inference. + class SplitResult + # @return [Array] List of results of splitped document regions. + attr_reader :splits + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + @splits = if server_response.key?('splits') + server_response['splits'].map do |split| + SplitRange.new(split) + end + end + end + + # String representation. + # @return [String] + def to_s + splits_str = @splits.join("\n") + + "Splits\n======\n#{splits_str}" + end + end + end + end + end +end diff --git a/sig/mindee/client_v2.rbs b/sig/mindee/client_v2.rbs index d33cdbb5..66b5409c 100644 --- a/sig/mindee/client_v2.rbs +++ b/sig/mindee/client_v2.rbs @@ -3,15 +3,19 @@ OTS_OWNER: String module Mindee class ClientV2 - attr_reader mindee_api: HTTP::MindeeApiV2 + private attr_reader mindee_api: HTTP::MindeeApiV2 def logger: () -> Logger def initialize: (?api_key: String) -> void - def get_inference: (String) -> Parsing::V2::InferenceResponse - def get_job: (String) -> Parsing::V2::JobResponse - def enqueue_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters) -> Parsing::V2::JobResponse - def enqueue_and_get_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters) -> Parsing::V2::InferenceResponse - def validate_async_params: (Integer | Float, Integer | Float, Integer) -> void - def normalize_inference_parameters: (Hash[String | Symbol, untyped] | Input::InferenceParameters) -> Input::InferenceParameters + def get_inference: (String inference_id) -> Parsing::V2::InferenceResponse + def get_result: [T] (HTTP::_ProductClass[T] product, String resource) -> T + def get_result_url: [T] (HTTP::_ResponseFactory[T] response_class, String inference_id) -> T + def get_job: (String job_id) -> Parsing::V2::JobResponse + def enqueue_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters params, ?disable_redundant_warnings: bool) -> Parsing::V2::JobResponse + def enqueue: [T] (HTTP::_ProductClass[T] product, Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::BaseParameters params) -> Parsing::V2::JobResponse + def enqueue_and_get_result: [T] (HTTP::_ProductClass[T] product, Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::BaseParameters params) -> T + def enqueue_and_get_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters params, ?disable_redundant_warnings: bool) -> Parsing::V2::InferenceResponse + def validate_async_params: (Integer | Float, Integer | Float, Integer) -> void + def normalize_parameters: (singleton(Input::BaseParameters) param_class, Hash[String | Symbol, untyped] | Input::BaseParameters params) -> Input::BaseParameters end end diff --git a/sig/mindee/errors/mindee_http_error_v2.rbs b/sig/mindee/errors/mindee_http_error_v2.rbs index b6b3d924..02e403a1 100644 --- a/sig/mindee/errors/mindee_http_error_v2.rbs +++ b/sig/mindee/errors/mindee_http_error_v2.rbs @@ -9,7 +9,7 @@ module Mindee attr_reader title: String attr_reader errors: Array[Parsing::V2::ErrorItem] - def initialize: (Hash[String, untyped] | Parsing::V2::ErrorResponse) -> void + def initialize: (Hash[String, untyped] | Mindee::Parsing::V2::ErrorResponse) -> void end end end diff --git a/sig/mindee/geometry/point.rbs b/sig/mindee/geometry/point.rbs index 9bac0dcf..f2cc620d 100644 --- a/sig/mindee/geometry/point.rbs +++ b/sig/mindee/geometry/point.rbs @@ -7,6 +7,8 @@ module Mindee def initialize: (Float, Float) -> void def []: (Integer) -> Float + + def to_s: -> String end end end diff --git a/sig/mindee/geometry/polygon.rbs b/sig/mindee/geometry/polygon.rbs index 19f8b7c5..23a42d5d 100644 --- a/sig/mindee/geometry/polygon.rbs +++ b/sig/mindee/geometry/polygon.rbs @@ -5,6 +5,8 @@ module Mindee def initialize: (Array[::Mindee::Geometry::Point | Array[Float]]) -> void def centroid: -> Point def point_in_y?: (Point) -> bool + + def to_s: -> String end end end diff --git a/sig/mindee/http/mindee_api_v2.rbs b/sig/mindee/http/mindee_api_v2.rbs index 80f6db4f..c67b09b3 100644 --- a/sig/mindee/http/mindee_api_v2.rbs +++ b/sig/mindee/http/mindee_api_v2.rbs @@ -1,21 +1,37 @@ -# lib/mindee/http/mindee_api_v2.rb +# lib/mindee/http/mindee_api_v2.rbs module Mindee module HTTP + interface _ResponseFactory[T] + def new: (Hash[String | Symbol, untyped]) -> T + end + + interface _ProductClass[T] + def slug: () -> String + def response_type: () -> _ResponseFactory[T] + def params_type: () -> singleton(Input::BaseParameters) + end + class MindeeApiV2 attr_reader settings: ApiSettingsV2 def initialize: (?api_key: String?) -> void - def req_post_inference_enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::InferenceParameters) -> Parsing::V2::JobResponse + + def req_get_result: [T] (_ProductClass[T] product, String resource) -> T + + def req_post_enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::BaseParameters) -> Parsing::V2::JobResponse def req_get_inference: (String) -> Parsing::V2::InferenceResponse def req_get_job: (String) -> Parsing::V2::JobResponse def process_response: (Net::HTTPResponse?) -> Hash[String | Symbol, untyped] def poll: (String) -> Net::HTTPResponse def inference_job_req_get: (String) -> Net::HTTPResponse def inference_result_req_get: (String) -> Net::HTTPResponse - def enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::InferenceParameters) -> Net::HTTPResponse? + def result_req_get: [T] (String, _ProductClass[T] product) -> Net::HTTPResponse + def enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::BaseParameters) -> Net::HTTPResponse? private - + def req_get_job_url: (String) -> Parsing::V2::JobResponse + def req_get_result_url: [T] (_ResponseFactory[T] result_class, String url) -> T + def uri?: (String) -> bool def enqueue_form_options: (Array[untyped], Input::InferenceParameters) -> Array[untyped] end end diff --git a/sig/mindee/input/base_parameters.rbs b/sig/mindee/input/base_parameters.rbs new file mode 100644 index 00000000..abccc144 --- /dev/null +++ b/sig/mindee/input/base_parameters.rbs @@ -0,0 +1,35 @@ +# lib/mindee/input/base_parameters.rb + +module Mindee + module Input + class BaseParameters + attr_reader self.slug: String + + def self.from_hash: (params: Hash[String | Symbol, untyped]) -> instance + def self.load_from_hash: (params: Hash[String | Symbol, untyped]) -> Hash[String | Symbol, untyped] + + def slug: -> String + + attr_reader model_id: String + attr_reader file_alias: String? + attr_reader webhook_ids: Array[String]? + attr_reader polling_options: PollingOptions + attr_reader close_file: bool + + def initialize: ( + String, + ?file_alias: String?, + ?webhook_ids: Array[String]?, + ?polling_options: Hash[Symbol | String, untyped] | PollingOptions?, + ?close_file: bool? + ) -> void + + def append_form_data: (Array[Array[untyped]]) -> Array[Array[untyped]] + def validate_async_params: () -> void + + private + + def get_clean_polling_options: (Hash[String | Symbol, untyped] | PollingOptions?) -> PollingOptions + end + end +end diff --git a/sig/mindee/input/inference_parameters.rbs b/sig/mindee/input/inference_parameters.rbs index 1e929c5f..b88d8ef2 100644 --- a/sig/mindee/input/inference_parameters.rbs +++ b/sig/mindee/input/inference_parameters.rbs @@ -1,17 +1,14 @@ # lib/mindee/input/inference_parameters.rb module Mindee module Input - class InferenceParameters - attr_reader close_file: bool + class InferenceParameters < BaseParameters + def self.slug: -> String + attr_reader confidence: bool? - attr_reader file_alias: String? - attr_reader model_id: String - attr_reader polling_options: PollingOptions attr_reader polygon: bool? attr_reader rag: bool? attr_reader raw_text: bool? attr_reader text_context: String? - attr_reader webhook_ids: Array[String]? attr_reader data_schema: DataSchema? def initialize: ( @@ -29,12 +26,7 @@ module Mindee ) -> void def self.from_hash: (params: Hash[String | Symbol, untyped]) -> InferenceParameters - - def validate_async_params: -> void - - private - - def get_clean_polling_options: (Hash[String | Symbol, untyped] | PollingOptions?) -> PollingOptions + def append_form_data: (Array[Array[untyped]]) -> Array[Array[untyped]] end end end diff --git a/sig/mindee/input/local_response.rbs b/sig/mindee/input/local_response.rbs index fd28bd5f..4f0468fb 100644 --- a/sig/mindee/input/local_response.rbs +++ b/sig/mindee/input/local_response.rbs @@ -2,13 +2,13 @@ module Mindee module Input class LocalResponse - def file: -> StringIO + attr_reader file: StringIO def initialize: (File | IO | StringIO | String | Pathname | Tempfile) -> void def as_hash: -> Hash[String | Symbol, untyped] def self.process_secret_key: (String) -> String def get_hmac_signature: (String) -> String def valid_hmac_signature?: (String, String) -> bool - def deserialize_response: (singleton(Parsing::V2::CommonResponse))-> (Parsing::V2::JobResponse | Parsing::V2::InferenceResponse) + def deserialize_response: (singleton(Parsing::V2::CommonResponse))-> (Parsing::V2::JobResponse | Mindee::Parsing::V2::InferenceResponse) end end end diff --git a/sig/mindee/parsing/v2/inference.rbs b/sig/mindee/parsing/v2/inference.rbs index cbdc794c..b6584c95 100644 --- a/sig/mindee/parsing/v2/inference.rbs +++ b/sig/mindee/parsing/v2/inference.rbs @@ -2,12 +2,11 @@ module Mindee module Parsing module V2 - class Inference + class Inference < Mindee::V2::Parsing::BaseInference + attr_reader self.params_type: singleton(Input::InferenceParameters) + attr_reader self.response_type: singleton(InferenceResponse) + attr_reader self.slug: String - attr_reader id: String - attr_reader job: InferenceJob - attr_reader model: InferenceModel - attr_reader file: InferenceFile attr_reader active_options: InferenceActiveOptions attr_reader result: InferenceResult diff --git a/sig/mindee/parsing/v2/inference_response.rbs b/sig/mindee/parsing/v2/inference_response.rbs index 7254f996..05138748 100644 --- a/sig/mindee/parsing/v2/inference_response.rbs +++ b/sig/mindee/parsing/v2/inference_response.rbs @@ -2,10 +2,19 @@ module Mindee module Parsing module V2 - class InferenceResponse < CommonResponse - attr_reader inference: V2::Inference + class InferenceResponse < Mindee::V2::Parsing::BaseResponse[Mindee::Parsing::V2::Inference] + + self.@slug: String + self.@_params_type: singleton(Input::BaseParameters) + + attr_reader inference: Mindee::Parsing::V2::Inference def initialize: (Hash[String | Symbol, untyped]) -> void + + def _params_type: -> singleton(Input::InferenceParameters) + def to_s: -> String + def self._params_type: () -> singleton(Input::InferenceParameters) + def self.slug: () -> String end end end diff --git a/sig/mindee/v2/parsing/base_inference.rbs b/sig/mindee/v2/parsing/base_inference.rbs new file mode 100644 index 00000000..4f6b0396 --- /dev/null +++ b/sig/mindee/v2/parsing/base_inference.rbs @@ -0,0 +1,18 @@ +# lib/mindee/v2/parsing/base_inference.rb + +module Mindee + module V2 + module Parsing + class BaseInference + attr_reader job: Mindee::Parsing::V2::InferenceJob + attr_reader file: Mindee::Parsing::V2::InferenceFile + attr_reader id: String + attr_reader model: Mindee::Parsing::V2::InferenceModel + + def initialize: (Hash[String | Symbol, untyped]) -> void + + def to_s: -> String + end + end + end +end diff --git a/sig/mindee/v2/parsing/base_response.rbs b/sig/mindee/v2/parsing/base_response.rbs new file mode 100644 index 00000000..8d1c7814 --- /dev/null +++ b/sig/mindee/v2/parsing/base_response.rbs @@ -0,0 +1,11 @@ +# lib/mindee/v2/parsing/base_response.rb + +module Mindee + module V2 + module Parsing + class BaseResponse[T] < Mindee::Parsing::V2::CommonResponse + attr_reader inference: T + end + end + end +end diff --git a/sig/mindee/v2/product/base_product.rbs b/sig/mindee/v2/product/base_product.rbs new file mode 100644 index 00000000..aa6bdbe3 --- /dev/null +++ b/sig/mindee/v2/product/base_product.rbs @@ -0,0 +1,19 @@ +module Mindee + module V2 + module Product + class BaseProduct + self.@slug: String + self.@params_type: singleton(Input::BaseParameters) + self.@response_type: singleton(Parsing::BaseResponse) + + def self._params_type: () -> singleton(Input::BaseParameters) + + attr_reader self.slug: String + attr_reader self.params_type: singleton(Input::BaseParameters) + attr_reader self.response_type: singleton(Parsing::BaseResponse) + + def initialize: -> void + end + end + end +end diff --git a/sig/mindee/v2/product/classification/classification.rbs b/sig/mindee/v2/product/classification/classification.rbs new file mode 100644 index 00000000..f8097582 --- /dev/null +++ b/sig/mindee/v2/product/classification/classification.rbs @@ -0,0 +1,10 @@ +module Mindee + module V2 + module Product + module Classification + class Classification < BaseProduct + end + end + end + end +end diff --git a/sig/mindee/v2/product/classification/classification_classifier.rbs b/sig/mindee/v2/product/classification/classification_classifier.rbs new file mode 100644 index 00000000..4a1aafe5 --- /dev/null +++ b/sig/mindee/v2/product/classification/classification_classifier.rbs @@ -0,0 +1,15 @@ +# lib/mindee/v2/product/classification/classification_classifier.rb + +module Mindee + module V2 + module Product + module Classification + class ClassificationClassifier + attr_reader document_type: String + + def initialize: (Hash[String | Symbol, untyped]) -> void + end + end + end + end +end diff --git a/sig/mindee/v2/product/classification/classification_inference.rbs b/sig/mindee/v2/product/classification/classification_inference.rbs new file mode 100644 index 00000000..af1ab163 --- /dev/null +++ b/sig/mindee/v2/product/classification/classification_inference.rbs @@ -0,0 +1,15 @@ +module Mindee + module V2 + module Product + module Classification + class ClassificationInference + attr_reader result: ClassificationResult + + def initialize: (Hash[String | Symbol, untyped]) -> void + + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/classification/classification_response.rbs b/sig/mindee/v2/product/classification/classification_response.rbs new file mode 100644 index 00000000..ee34fbc1 --- /dev/null +++ b/sig/mindee/v2/product/classification/classification_response.rbs @@ -0,0 +1,23 @@ +# lib/mindee/v2/product/classification/classification_response.rb + +module Mindee + module V2 + module Product + module Classification + class ClassificationResponse + self.@slug: String + self.@_params_type: singleton(Params::ClassificationParameters) + + attr_reader inference: Mindee::V2::Product::Classification::ClassificationInference + def initialize: (Hash[String | Symbol, untyped]) -> void + + def _params_type: -> singleton(Params::ClassificationParameters) + + def to_s: -> String + def self._params_type: () -> singleton(Params::ClassificationParameters) + def self.slug: () -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/classification/classification_result.rbs b/sig/mindee/v2/product/classification/classification_result.rbs new file mode 100644 index 00000000..54dbeec9 --- /dev/null +++ b/sig/mindee/v2/product/classification/classification_result.rbs @@ -0,0 +1,15 @@ +module Mindee + module V2 + module Product + module Classification + class ClassificationResult + attr_reader classification: ClassificationClassifier + + def initialize: (Hash[String | Symbol, untyped]) -> void + + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs b/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs new file mode 100644 index 00000000..57b319bb --- /dev/null +++ b/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs @@ -0,0 +1,23 @@ +module Mindee + module V2 + module Product + module Classification + module Params + class ClassificationParameters + def self.slug: -> String + + def self.from_hash: (params: Hash[String | Symbol, untyped]) -> ClassificationParameters + + def initialize: ( + String, + ?file_alias: String?, + ?webhook_ids: Array[String]?, + ?polling_options: Hash[Symbol | String, untyped] | Input::PollingOptions?, + ?close_file: bool? + ) -> void + end + end + end + end + end +end diff --git a/sig/mindee/v2/product/crop/crop.rbs b/sig/mindee/v2/product/crop/crop.rbs new file mode 100644 index 00000000..13774685 --- /dev/null +++ b/sig/mindee/v2/product/crop/crop.rbs @@ -0,0 +1,10 @@ +module Mindee + module V2 + module Product + module Crop + class Crop < BaseProduct + end + end + end + end +end diff --git a/sig/mindee/v2/product/crop/crop_inference.rbs b/sig/mindee/v2/product/crop/crop_inference.rbs new file mode 100644 index 00000000..63ea5b93 --- /dev/null +++ b/sig/mindee/v2/product/crop/crop_inference.rbs @@ -0,0 +1,14 @@ +module Mindee + module V2 + module Product + module Crop + class CropInference + attr_reader result: CropResult + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/crop/crop_item.rbs b/sig/mindee/v2/product/crop/crop_item.rbs new file mode 100644 index 00000000..1e3e7998 --- /dev/null +++ b/sig/mindee/v2/product/crop/crop_item.rbs @@ -0,0 +1,15 @@ +module Mindee + module V2 + module Product + module Crop + class CropItem + attr_reader object_type: String + attr_reader location: Mindee::Parsing::V2::Field::FieldLocation + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/crop/crop_response.rbs b/sig/mindee/v2/product/crop/crop_response.rbs new file mode 100644 index 00000000..4b0c638f --- /dev/null +++ b/sig/mindee/v2/product/crop/crop_response.rbs @@ -0,0 +1,23 @@ +# lib/mindee/v2/product/crop/crop_response.rb + +module Mindee + module V2 + module Product + module Crop + class CropResponse + self.@slug: String + self.@_params_type: singleton(Params::CropParameters) + + attr_reader inference: Mindee::V2::Product::Crop::CropInference + def initialize: (Hash[String | Symbol, untyped]) -> void + + def _params_type: -> singleton(Params::CropParameters) + + def to_s: -> String + def self._params_type: () -> singleton(Params::CropParameters) + def self.slug: () -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/crop/crop_result.rbs b/sig/mindee/v2/product/crop/crop_result.rbs new file mode 100644 index 00000000..67cd36e9 --- /dev/null +++ b/sig/mindee/v2/product/crop/crop_result.rbs @@ -0,0 +1,14 @@ +module Mindee + module V2 + module Product + module Crop + class CropResult + attr_reader crops: Array[CropItem] + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs b/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs new file mode 100644 index 00000000..1aad2a97 --- /dev/null +++ b/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs @@ -0,0 +1,23 @@ +module Mindee + module V2 + module Product + module Crop + module Params + class CropParameters + def self.slug: -> String + + def self.from_hash: (params: Hash[String | Symbol, untyped]) -> CropParameters + + def initialize: ( + String, + ?file_alias: String?, + ?webhook_ids: Array[String]?, + ?polling_options: Hash[Symbol | String, untyped] | Input::PollingOptions?, + ?close_file: bool? + ) -> void + end + end + end + end + end +end diff --git a/sig/mindee/v2/product/extraction/extraction.rbs b/sig/mindee/v2/product/extraction/extraction.rbs new file mode 100644 index 00000000..c747c7b8 --- /dev/null +++ b/sig/mindee/v2/product/extraction/extraction.rbs @@ -0,0 +1,12 @@ +module Mindee + module V2 + module Product + module Extraction + class Extraction + self.@params_type: singleton(Mindee::V2::Product::Extraction::Params::ExtractionParameters) + self.@response_type: singleton(Mindee::V2::Product::Extraction::ExtractionResponse) + end + end + end + end +end diff --git a/sig/mindee/v2/product/extraction/extraction_inference.rbs b/sig/mindee/v2/product/extraction/extraction_inference.rbs new file mode 100644 index 00000000..ffefa44b --- /dev/null +++ b/sig/mindee/v2/product/extraction/extraction_inference.rbs @@ -0,0 +1,14 @@ +module Mindee + module V2 + module Product + module Extraction + class ExtractionInference + attr_reader result: ExtractionResult + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/extraction/extraction_response.rbs b/sig/mindee/v2/product/extraction/extraction_response.rbs new file mode 100644 index 00000000..000d7f82 --- /dev/null +++ b/sig/mindee/v2/product/extraction/extraction_response.rbs @@ -0,0 +1,18 @@ +module Mindee + module V2 + module Product + module Extraction + class ExtractionResponse + self.@_params_type: singleton(Params::ExtractionParameters) + attr_reader inference: Mindee::V2::Product::Extraction::ExtractionInference + + def initialize: (Hash[String | Symbol, untyped]) -> void + + def _params_type: -> singleton(Params::ExtractionParameters) + + def self._params_type: () -> singleton(Params::ExtractionParameters) + end + end + end + end +end diff --git a/sig/mindee/v2/product/extraction/extraction_result.rbs b/sig/mindee/v2/product/extraction/extraction_result.rbs new file mode 100644 index 00000000..975d364c --- /dev/null +++ b/sig/mindee/v2/product/extraction/extraction_result.rbs @@ -0,0 +1,10 @@ +module Mindee + module V2 + module Product + module Extraction + class ExtractionResult < Mindee::Parsing::V2::InferenceResult + end + end + end + end +end diff --git a/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs b/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs new file mode 100644 index 00000000..f93bcdae --- /dev/null +++ b/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs @@ -0,0 +1,12 @@ +module Mindee + module V2 + module Product + module Extraction + module Params + class ExtractionParameters < Input::InferenceParameters + end + end + end + end + end +end diff --git a/sig/mindee/v2/product/ocr/ocr.rbs b/sig/mindee/v2/product/ocr/ocr.rbs new file mode 100644 index 00000000..9bbc96d0 --- /dev/null +++ b/sig/mindee/v2/product/ocr/ocr.rbs @@ -0,0 +1,10 @@ +module Mindee + module V2 + module Product + module Ocr + class Ocr < BaseProduct + end + end + end + end +end diff --git a/sig/mindee/v2/product/ocr/ocr_inference.rbs b/sig/mindee/v2/product/ocr/ocr_inference.rbs new file mode 100644 index 00000000..029aa6fb --- /dev/null +++ b/sig/mindee/v2/product/ocr/ocr_inference.rbs @@ -0,0 +1,14 @@ +module Mindee + module V2 + module Product + module Ocr + class OcrInference + attr_reader result: OcrResult + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/ocr/ocr_page.rbs b/sig/mindee/v2/product/ocr/ocr_page.rbs new file mode 100644 index 00000000..11cd9601 --- /dev/null +++ b/sig/mindee/v2/product/ocr/ocr_page.rbs @@ -0,0 +1,15 @@ +module Mindee + module V2 + module Product + module Ocr + class OcrPage + attr_reader words: Array[OcrWord] + attr_reader content: String + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/ocr/ocr_response.rbs b/sig/mindee/v2/product/ocr/ocr_response.rbs new file mode 100644 index 00000000..609aef0e --- /dev/null +++ b/sig/mindee/v2/product/ocr/ocr_response.rbs @@ -0,0 +1,23 @@ +# lib/mindee/v2/product/ocr/ocr_response.rb + +module Mindee + module V2 + module Product + module Ocr + class OcrResponse + self.@slug: String + self.@_params_type: singleton(Params::OcrParameters) + + attr_reader inference: Mindee::V2::Product::Ocr::OcrInference + def initialize: (Hash[String | Symbol, untyped]) -> void + + def _params_type: -> singleton(Params::OcrParameters) + + def to_s: -> String + def self._params_type: () -> singleton(Params::OcrParameters) + def self.slug: () -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/ocr/ocr_result.rbs b/sig/mindee/v2/product/ocr/ocr_result.rbs new file mode 100644 index 00000000..c35b0d73 --- /dev/null +++ b/sig/mindee/v2/product/ocr/ocr_result.rbs @@ -0,0 +1,14 @@ +module Mindee + module V2 + module Product + module Ocr + class OcrResult + attr_reader pages: Array[Mindee::V2::Product::Ocr::OcrPage] + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/ocr/ocr_word.rbs b/sig/mindee/v2/product/ocr/ocr_word.rbs new file mode 100644 index 00000000..2d9967fb --- /dev/null +++ b/sig/mindee/v2/product/ocr/ocr_word.rbs @@ -0,0 +1,15 @@ +module Mindee + module V2 + module Product + module Ocr + class OcrWord + attr_reader content: String + attr_reader polygon: Geometry::Polygon + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs b/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs new file mode 100644 index 00000000..73172082 --- /dev/null +++ b/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs @@ -0,0 +1,24 @@ +module Mindee + module V2 + module Product + module Ocr + module Params + class OcrParameters + def self.slug: -> String + + def self.from_hash: (params: Hash[String | Symbol, untyped]) -> OcrParameters + + def initialize: ( + String, + ?file_alias: String?, + ?webhook_ids: Array[String]?, + ?polling_options: Hash[Symbol | String, untyped] | Input::PollingOptions?, + ?close_file: bool? + ) -> void + end + + end + end + end + end +end diff --git a/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs b/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs new file mode 100644 index 00000000..7011b3ca --- /dev/null +++ b/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs @@ -0,0 +1,23 @@ +module Mindee + module V2 + module Product + module Split + module Params + class SplitParameters + def self.slug: -> String + + def self.from_hash: (params: Hash[String | Symbol, untyped]) -> SplitParameters + + def initialize: ( + String, + ?file_alias: String?, + ?webhook_ids: Array[String]?, + ?polling_options: Hash[Symbol | String, untyped] | Input::PollingOptions?, + ?close_file: bool? + ) -> void + end + end + end + end + end +end diff --git a/sig/mindee/v2/product/split/split.rbs b/sig/mindee/v2/product/split/split.rbs new file mode 100644 index 00000000..835a2de1 --- /dev/null +++ b/sig/mindee/v2/product/split/split.rbs @@ -0,0 +1,10 @@ +module Mindee + module V2 + module Product + module Split + class Split < BaseProduct + end + end + end + end +end diff --git a/sig/mindee/v2/product/split/split_inference.rbs b/sig/mindee/v2/product/split/split_inference.rbs new file mode 100644 index 00000000..2cd0c31e --- /dev/null +++ b/sig/mindee/v2/product/split/split_inference.rbs @@ -0,0 +1,14 @@ +module Mindee + module V2 + module Product + module Split + class SplitInference + attr_reader result: SplitResult + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/split/split_range.rbs b/sig/mindee/v2/product/split/split_range.rbs new file mode 100644 index 00000000..14a390ef --- /dev/null +++ b/sig/mindee/v2/product/split/split_range.rbs @@ -0,0 +1,15 @@ +module Mindee + module V2 + module Product + module Split + class SplitRange + attr_reader page_range: Array[int] + attr_reader document_type: String + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/split/split_response.rbs b/sig/mindee/v2/product/split/split_response.rbs new file mode 100644 index 00000000..9068eeb5 --- /dev/null +++ b/sig/mindee/v2/product/split/split_response.rbs @@ -0,0 +1,23 @@ +# lib/mindee/v2/product/split/split_response.rb + +module Mindee + module V2 + module Product + module Split + class SplitResponse + self.@slug: String + self.@_params_type: singleton(Params::SplitParameters) + + attr_reader inference: Mindee::V2::Product::Split::SplitInference + def initialize: (Hash[String | Symbol, untyped]) -> void + + def _params_type: -> singleton(Params::SplitParameters) + + def to_s: -> String + def self._params_type: () -> singleton(Params::SplitParameters) + def self.slug: () -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/split/split_result.rbs b/sig/mindee/v2/product/split/split_result.rbs new file mode 100644 index 00000000..e1be3f6a --- /dev/null +++ b/sig/mindee/v2/product/split/split_result.rbs @@ -0,0 +1,14 @@ +module Mindee + module V2 + module Product + module Split + class SplitResult + attr_reader splits: Array[SplitRange] + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/spec/data.rb b/spec/data.rb index 5b67c8ca..3550b5de 100644 --- a/spec/data.rb +++ b/spec/data.rb @@ -4,6 +4,7 @@ FILE_TYPES_DIR = File.join(ROOT_DATA_DIR, 'file_types').freeze V1_DATA_DIR = File.join(ROOT_DATA_DIR, 'v1').freeze V2_DATA_DIR = File.join(ROOT_DATA_DIR, 'v2').freeze +V2_PRODUCT_DATA_DIR = File.join(V2_DATA_DIR, 'products').freeze V1_ASYNC_DIR = File.join(V1_DATA_DIR, 'async').freeze V1_PRODUCT_DATA_DIR = File.join(V1_DATA_DIR, 'products').freeze V1_OCR_DIR = File.join(V1_DATA_DIR, 'extras', 'ocr') diff --git a/spec/test_code_samples.sh b/spec/test_code_samples.sh deleted file mode 100755 index 5bce3950..00000000 --- a/spec/test_code_samples.sh +++ /dev/null @@ -1,53 +0,0 @@ -#! /bin/sh -set -e - -OUTPUT_FILE='./_test.rb' -ACCOUNT=$1 -ENDPOINT=$2 -API_KEY=$3 -API_KEY_V2=$4 -MODEL_ID=$5 - -if [ -z "${ACCOUNT}" ]; then echo "ACCOUNT is required"; exit 1; fi -if [ -z "${ENDPOINT}" ]; then echo "ENDPOINT is required"; exit 1; fi - -for f in $(find ./docs/code_samples -maxdepth 1 -name "*.txt" -not -name "workflow_*.txt" | sort -h) -do - echo - echo "###############################################" - echo "${f}" - echo "###############################################" - echo - - sed "s/my-api-key/${API_KEY}/" "$f" > $OUTPUT_FILE - sed -i 's/\/path\/to\/the\/file.ext/.\/spec\/data\/file_types\/pdf\/blank_1.pdf/' $OUTPUT_FILE - - if echo "${f}" | grep -q "default_v2.txt" - then - sed -i "s/MY_API_KEY/$API_KEY_V2/" $OUTPUT_FILE - sed -i "s/MY_MODEL_ID/$MODEL_ID/" $OUTPUT_FILE - else - sed -i "s/my-api-key/$API_KEY/" $OUTPUT_FILE - fi - - if echo "$f" | grep -q "custom_v1.txt" - then - sed -i "s/my-account/$ACCOUNT/g" $OUTPUT_FILE - sed -i "s/my-endpoint/$ENDPOINT/g" $OUTPUT_FILE - fi - if echo "${f}" | grep -q "default.txt" - then - sed -i "s/my-account/$ACCOUNT/g" $OUTPUT_FILE - sed -i "s/my-endpoint/$ENDPOINT/g" $OUTPUT_FILE - sed -i "s/my-version/1/g" $OUTPUT_FILE - fi - - if echo "${f}" | grep -q "default_async.txt" - then - sed -i "s/my-account/mindee/" $OUTPUT_FILE - sed -i "s/my-endpoint/invoice_splitter/" $OUTPUT_FILE - sed -i "s/my-version/1/" $OUTPUT_FILE - fi - - bundle exec ruby $OUTPUT_FILE -done diff --git a/spec/test_code_samples_v1.sh b/spec/test_code_samples_v1.sh new file mode 100755 index 00000000..ecf2899e --- /dev/null +++ b/spec/test_code_samples_v1.sh @@ -0,0 +1,40 @@ +#! /bin/sh +set -e + +OUTPUT_FILE='./_test_v1.rb' + +if [ -z "${MINDEE_ACCOUNT_SE_TESTS}" ]; then echo "MINDEE_ACCOUNT_SE_TESTS is required"; exit 1; fi +if [ -z "${MINDEE_ENDPOINT_SE_TESTS}" ]; then echo "MINDEE_ENDPOINT_SE_TESTS is required"; exit 1; fi + +for f in $(find ./docs/code_samples -maxdepth 1 -name "*.txt" -not -name "workflow_*.txt" -not -name "v2_*" | sort -h) +do + echo + echo "###############################################" + echo "${f}" + echo "###############################################" + echo + + sed "s/my-api-key/${MINDEE_API_KEY_SE_TESTS}/" "$f" > $OUTPUT_FILE + sed -i 's/\/path\/to\/the\/file.ext/.\/spec\/data\/file_types\/pdf\/blank_1.pdf/' $OUTPUT_FILE + + if echo "$f" | grep -q "custom_v1.txt" + then + sed -i "s/my-account/$MINDEE_ACCOUNT_SE_TESTS/g" $OUTPUT_FILE + sed -i "s/my-endpoint/$MINDEE_ENDPOINT_SE_TESTS/g" $OUTPUT_FILE + fi + if echo "${f}" | grep -q "default.txt" + then + sed -i "s/my-account/$MINDEE_ACCOUNT_SE_TESTS/g" $OUTPUT_FILE + sed -i "s/my-endpoint/$MINDEE_ENDPOINT_SE_TESTS/g" $OUTPUT_FILE + sed -i "s/my-version/1/g" $OUTPUT_FILE + fi + + if echo "${f}" | grep -q "default_async.txt" + then + sed -i "s/my-account/mindee/" $OUTPUT_FILE + sed -i "s/my-endpoint/invoice_splitter/" $OUTPUT_FILE + sed -i "s/my-version/1/" $OUTPUT_FILE + fi + + bundle exec ruby $OUTPUT_FILE +done diff --git a/spec/test_code_samples_v2.sh b/spec/test_code_samples_v2.sh new file mode 100755 index 00000000..16a6e30c --- /dev/null +++ b/spec/test_code_samples_v2.sh @@ -0,0 +1,45 @@ +#! /bin/sh +set -e + +OUTPUT_FILE='./_test_v2.rb' + +for f in $(find ./docs/code_samples -maxdepth 1 -name "v2_*.txt" | sort -h) +do + echo + echo "###############################################" + echo "${f}" + echo "###############################################" + echo + + cat "${f}" > $OUTPUT_FILE + sed -i "s/MY_API_KEY/${MINDEE_V2_API_KEY}/" $OUTPUT_FILE + sed -i "s/MY_WEBHOOK_ID/${MINDEE_V2_SE_TESTS_FAILURE_WEBHOOK_ID}/" $OUTPUT_FILE + sed -i 's/\/path\/to\/the\/file.ext/.\/spec\/data\/file_types\/pdf\/blank_1.pdf/' $OUTPUT_FILE + + if echo "${f}" | grep -q "v2_extraction.*.txt" + then + sed -i "s/MY_MODEL_ID/${MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID}/" $OUTPUT_FILE + fi + + if echo "${f}" | grep -q "v2_classification.txt" + then + sed -i "s/MY_MODEL_ID/${MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID}/" $OUTPUT_FILE + fi + + if echo "${f}" | grep -q "v2_crop.txt" + then + sed -i "s/MY_MODEL_ID/${MINDEE_V2_SE_TESTS_CROP_MODEL_ID}/" $OUTPUT_FILE + fi + + if echo "${f}" | grep -q "v2_ocr.txt" + then + sed -i "s/MY_MODEL_ID/${MINDEE_V2_SE_TESTS_OCR_MODEL_ID}/" $OUTPUT_FILE + fi + + if echo "${f}" | grep -q "v2_split.txt" + then + sed -i "s/MY_MODEL_ID/${MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID}/" $OUTPUT_FILE + fi + + bundle exec ruby $OUTPUT_FILE +done diff --git a/spec/v2/client_v2_integration.rb b/spec/v2/client_v2_integration.rb index d5da7f10..598a72b9 100644 --- a/spec/v2/client_v2_integration.rb +++ b/spec/v2/client_v2_integration.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require 'mindee' + describe 'Mindee::ClientV2 – integration tests (V2)', :integration, order: :defined do let(:api_key) { ENV.fetch('MINDEE_V2_API_KEY') } let(:model_id) { ENV.fetch('MINDEE_V2_FINDOC_MODEL_ID') } @@ -219,7 +221,7 @@ context 'A Data Schema Override' do it 'Overrides successfully' do - data_schema_replace = File.read(File.join(V2_DATA_DIR, 'products', 'extraction', + data_schema_replace = File.read(File.join(V2_PRODUCT_DATA_DIR, 'extraction', 'data_schema_replace_param.json')) input = Mindee::Input::Source::PathInputSource.new(File.join(FILE_TYPES_DIR, 'pdf', 'blank_1.pdf')) diff --git a/spec/v2/client_v2_spec.rb b/spec/v2/client_v2_spec.rb index dc1b3714..57c3c2fa 100644 --- a/spec/v2/client_v2_spec.rb +++ b/spec/v2/client_v2_spec.rb @@ -4,7 +4,7 @@ require 'mindee' require_relative '../http/mock_http_response' # <- the original helper -RSpec.describe Mindee::ClientV2 do +describe Mindee::ClientV2 do let(:input_doc) { Mindee::Input::Source::PathInputSource.new(File.join(FILE_TYPES_DIR, 'pdf', 'blank.pdf')) } let(:base_url) { 'https://dummy-url' } let(:api_key) { 'dummy-api-key' } diff --git a/spec/v2/input/inference_parameter_spec.rb b/spec/v2/input/inference_parameter_spec.rb index af0ccd6a..bd4ae299 100644 --- a/spec/v2/input/inference_parameter_spec.rb +++ b/spec/v2/input/inference_parameter_spec.rb @@ -5,7 +5,7 @@ describe Mindee::Input::InferenceParameters do let(:extracted_schema_content) do - File.read(File.join(V2_DATA_DIR, 'products', 'extraction', 'data_schema_replace_param.json')) + File.read(File.join(V2_PRODUCT_DATA_DIR, 'extraction', 'data_schema_replace_param.json')) end let(:extracted_schema_hash) { JSON.parse(extracted_schema_content) } let(:extracted_schema_str) { extracted_schema_hash.to_json } diff --git a/spec/v2/input/local_response_v2_spec.rb b/spec/v2/input/local_response_v2_spec.rb index f27a5961..ee5dd549 100644 --- a/spec/v2/input/local_response_v2_spec.rb +++ b/spec/v2/input/local_response_v2_spec.rb @@ -18,7 +18,7 @@ def assert_local_response(local_response) end describe Mindee::Input::LocalResponse do - let(:file_path) { File.join(V2_DATA_DIR, 'products', 'extraction', 'standard_field_types.json') } + let(:file_path) { File.join(V2_PRODUCT_DATA_DIR, 'extraction', 'standard_field_types.json') } context 'A V2 local response' do it 'should load from a path' do response = Mindee::Input::LocalResponse.new(file_path) diff --git a/spec/v2/parsing/inference_spec.rb b/spec/v2/parsing/inference_spec.rb index f0c496c6..31f2c2b0 100644 --- a/spec/v2/parsing/inference_spec.rb +++ b/spec/v2/parsing/inference_spec.rb @@ -2,9 +2,9 @@ require 'mindee' -RSpec.describe 'inference' do - let(:findoc_path) { File.join(V2_DATA_DIR, 'products', 'extraction', 'financial_document') } - let(:extraction_path) { File.join(V2_DATA_DIR, 'products', 'extraction') } +describe 'inference' do + let(:findoc_path) { File.join(V2_PRODUCT_DATA_DIR, 'extraction', 'financial_document') } + let(:extraction_path) { File.join(V2_PRODUCT_DATA_DIR, 'extraction') } let(:deep_nested_field_path) { File.join(extraction_path, 'deep_nested_fields.json') } let(:standard_field_path) { File.join(extraction_path, 'standard_field_types.json') } let(:standard_field_rst_path) { File.join(extraction_path, 'standard_field_types.rst') } diff --git a/spec/v2/parsing/job_webhook_spec.rb b/spec/v2/parsing/job_webhook_spec.rb index bf046760..b07e6263 100644 --- a/spec/v2/parsing/job_webhook_spec.rb +++ b/spec/v2/parsing/job_webhook_spec.rb @@ -2,7 +2,7 @@ require 'mindee' -RSpec.describe Mindee::Parsing::V2::JobWebhook do +describe Mindee::Parsing::V2::JobWebhook do describe '#initialize' do context 'when error key is present but value is nil' do it 'does not raise an error and sets @error to nil' do diff --git a/spec/v2/product/classification/classification_integration.rb b/spec/v2/product/classification/classification_integration.rb new file mode 100644 index 00000000..e0c0b326 --- /dev/null +++ b/spec/v2/product/classification/classification_integration.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +require 'mindee' +require 'mindee/v2/product' + +describe Mindee::ClientV2, :integration, :v2 do + let(:classification_model_id) do + ENV.fetch('MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID', nil) + end + + let(:v2_client) do + Mindee::ClientV2.new + end + + it 'processes classification default sample correctly' do + input_source = Mindee::Input::Source::PathInputSource.new( + File.join(V2_PRODUCT_DATA_DIR, 'classification', 'default_invoice.jpg') + ) + + params = { model_id: classification_model_id } + + response = v2_client.enqueue_and_get_result( + Mindee::V2::Product::Classification::Classification, + input_source, + params + ) + + expect(response.inference).not_to be_nil + expect(response.inference.file.name).to eq('default_invoice.jpg') + expect(response.inference.result.classification).not_to be_nil + expect(response.inference.result.classification.document_type).to eq('invoice') + end +end diff --git a/spec/v2/product/classification/classification_spec.rb b/spec/v2/product/classification/classification_spec.rb new file mode 100644 index 00000000..b27345ae --- /dev/null +++ b/spec/v2/product/classification/classification_spec.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +require 'json' +require 'mindee/v2/product' + +describe Mindee::V2::Product::Classification::Classification, :v2 do + let(:classification_data_dir) { File.join(V2_PRODUCT_DATA_DIR, 'classification') } + + it 'parses a single classification properly' do + json_path = File.join(classification_data_dir, 'classification_single.json') + json_sample = JSON.parse(File.read(json_path)) + + response = Mindee::V2::Product::Classification::ClassificationResponse.new(json_sample) + + expect(response.inference).to be_a(Mindee::V2::Product::Classification::ClassificationInference) + expect(response.inference.result).to be_a(Mindee::V2::Product::Classification::ClassificationResult) + expect( + response.inference.result.classification + ).to be_a(Mindee::V2::Product::Classification::ClassificationClassifier) + + expect(response.inference.result.classification.document_type).to eq('invoice') + end +end diff --git a/spec/v2/product/crop/crop_integration.rb b/spec/v2/product/crop/crop_integration.rb new file mode 100644 index 00000000..f8095c56 --- /dev/null +++ b/spec/v2/product/crop/crop_integration.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +require 'mindee' +require 'mindee/v2/product' + +describe Mindee::ClientV2, :integration, :v2 do + let(:crop_model_id) do + ENV.fetch('MINDEE_V2_SE_TESTS_CROP_MODEL_ID', nil) + end + + let(:v2_client) do + Mindee::ClientV2.new + end + + it 'processes crop default sample correctly' do + input_source = Mindee::Input::Source::PathInputSource.new( + File.join(V2_PRODUCT_DATA_DIR, 'crop', 'default_sample.jpg') + ) + + params = { model_id: crop_model_id } + + response = v2_client.enqueue_and_get_result( + Mindee::V2::Product::Crop::Crop, + input_source, + params + ) + + expect(response.inference).not_to be_nil + expect(response.inference.file.name).to eq('default_sample.jpg') + expect(response.inference.result.crops).not_to be_empty + expect(response.inference.result.crops.size).to eq(2) + end +end diff --git a/spec/v2/product/crop/crop_spec.rb b/spec/v2/product/crop/crop_spec.rb new file mode 100644 index 00000000..2ab6e1f5 --- /dev/null +++ b/spec/v2/product/crop/crop_spec.rb @@ -0,0 +1,86 @@ +# frozen_string_literal: true + +require 'json' +require 'mindee/v2/product' + +require_relative '../../../data' + +describe Mindee::V2::Product::Crop::Crop do + let(:crop_data_dir) { File.join(V2_PRODUCT_DATA_DIR, 'crop') } + + it 'parses a single crop properly' do + json_path = File.join(crop_data_dir, 'crop_single.json') + rst_path = File.join(crop_data_dir, 'crop_single.rst') + + json_sample = JSON.parse(File.read(json_path)) + rst_sample = File.read(rst_path) + + response = Mindee::V2::Product::Crop::CropResponse.new(json_sample) + + expect(response.inference).to be_a(Mindee::V2::Product::Crop::CropInference) + expect(response.inference.result.crops).not_to be_empty + + crop = response.inference.result.crops[0] + expect(crop.location.polygon.size).to eq(4) + expect(crop.location.polygon[0][0]).to eq(0.15) + expect(crop.location.polygon[0][1]).to eq(0.254) + expect(crop.location.polygon[1][0]).to eq(0.85) + expect(crop.location.polygon[1][1]).to eq(0.254) + expect(crop.location.polygon[2][0]).to eq(0.85) + expect(crop.location.polygon[2][1]).to eq(0.947) + expect(crop.location.polygon[3][0]).to eq(0.15) + expect(crop.location.polygon[3][1]).to eq(0.947) + + expect(crop.location.page).to eq(0) + expect(crop.object_type).to eq('invoice') + + expect(response.to_s).to eq(rst_sample) + end + + it 'parses multiple crops properly' do + json_path = File.join(crop_data_dir, 'crop_multiple.json') + rst_path = File.join(crop_data_dir, 'crop_multiple.rst') + + json_sample = JSON.parse(File.read(json_path)) + rst_sample = File.read(rst_path) + + response = Mindee::V2::Product::Crop::CropResponse.new(json_sample) + + expect(response.inference).to be_a(Mindee::V2::Product::Crop::CropInference) + expect(response.inference.result).to be_a(Mindee::V2::Product::Crop::CropResult) + expect(response.inference.result.crops[0]).to be_a(Mindee::V2::Product::Crop::CropItem) + expect(response.inference.result.crops.size).to eq(2) + + # First Crop assertions + crop_zero = response.inference.result.crops[0] + expect(crop_zero.location.polygon.size).to eq(4) + expect(crop_zero.location.polygon[0][0]).to eq(0.214) + expect(crop_zero.location.polygon[0][1]).to eq(0.079) + expect(crop_zero.location.polygon[1][0]).to eq(0.476) + expect(crop_zero.location.polygon[1][1]).to eq(0.079) + expect(crop_zero.location.polygon[2][0]).to eq(0.476) + expect(crop_zero.location.polygon[2][1]).to eq(0.979) + expect(crop_zero.location.polygon[3][0]).to eq(0.214) + expect(crop_zero.location.polygon[3][1]).to eq(0.979) + + expect(crop_zero.location.page).to eq(0) + expect(crop_zero.object_type).to eq('invoice') + + # Second Crop assertions + crop_one = response.inference.result.crops[1] + expect(crop_one.location.polygon.size).to eq(4) + expect(crop_one.location.polygon[0][0]).to eq(0.547) + expect(crop_one.location.polygon[0][1]).to eq(0.15) + expect(crop_one.location.polygon[1][0]).to eq(0.862) + expect(crop_one.location.polygon[1][1]).to eq(0.15) + expect(crop_one.location.polygon[2][0]).to eq(0.862) + expect(crop_one.location.polygon[2][1]).to eq(0.97) + expect(crop_one.location.polygon[3][0]).to eq(0.547) + expect(crop_one.location.polygon[3][1]).to eq(0.97) + + expect(crop_one.location.page).to eq(0) + expect(crop_one.object_type).to eq('invoice') + + expect(response.to_s).to eq(rst_sample) + end +end diff --git a/spec/v2/product/extraction/extraction_spec.rb b/spec/v2/product/extraction/extraction_spec.rb new file mode 100644 index 00000000..352a822f --- /dev/null +++ b/spec/v2/product/extraction/extraction_spec.rb @@ -0,0 +1,389 @@ +# frozen_string_literal: true + +require 'mindee' +require 'mindee/input/local_response' +require 'mindee/v2/product' + +describe 'extraction' do + let(:findoc_path) { File.join(V2_PRODUCT_DATA_DIR, 'extraction', 'financial_document') } + let(:extraction_path) { File.join(V2_PRODUCT_DATA_DIR, 'extraction') } + let(:deep_nested_field_path) { File.join(extraction_path, 'deep_nested_fields.json') } + let(:standard_field_path) { File.join(extraction_path, 'standard_field_types.json') } + let(:standard_field_rst_path) { File.join(extraction_path, 'standard_field_types.rst') } + let(:location_field_path) { File.join(findoc_path, 'complete_with_coordinates.json') } + let(:raw_text_json_path) { File.join(extraction_path, 'raw_texts.json') } + let(:raw_text_str_path) { File.join(extraction_path, 'raw_texts.txt') } + let(:blank_path) { File.join(findoc_path, 'blank.json') } + let(:complete_path) { File.join(findoc_path, 'complete.json') } + let(:rag_matched_path) { File.join(extraction_path, 'rag_matched.json') } + let(:rag_not_matched_path) { File.join(extraction_path, 'rag_not_matched.json') } + let(:text_context_path) { File.join(extraction_path, 'text_context_enabled.json') } + + def load_v2_extraction_inference(resource_path) + local_response = Mindee::Input::LocalResponse.new(resource_path) + local_response.deserialize_response(Mindee::V2::Product::Extraction::ExtractionResponse) + end + + simple_field = Mindee::Parsing::V2::Field::SimpleField + object_field = Mindee::Parsing::V2::Field::ObjectField + list_field = Mindee::Parsing::V2::Field::ListField + field_confidence = Mindee::Parsing::V2::Field::FieldConfidence + + describe 'simple' do + it 'loads a blank extraction inference with valid properties' do + response = load_v2_extraction_inference(blank_path) + + fields = response.inference.result.fields + expect(fields).not_to be_empty + expect(fields).to be_a(Mindee::Parsing::V2::Field::InferenceFields) + expect(fields.size).to eq(21) + + expect(fields).to have_key('taxes') + expect(fields['taxes']).not_to be_nil + expect(fields['taxes']).to be_a(list_field) + + expect(fields['supplier_address']).not_to be_nil + expect(fields['supplier_address']).to be_a(object_field) + + fields.each_value do |entry| + next if entry.is_a?(simple_field) && entry.value.nil? + + case entry + when simple_field + expect(entry.value).not_to be_nil + when object_field + expect(entry.fields).not_to be_nil + when list_field + expect(entry.items).not_to be_nil + else + raise "Unknown field type: #{entry.class}" + end + end + end + + it 'loads a complete inference with valid properties' do + response = load_v2_extraction_inference(complete_path) + inference = response.inference + job = inference.job + expect(job).not_to be_nil + expect(job).to be_a(Mindee::Parsing::V2::InferenceJob) + expect(job.id).to eq('12345678-1234-1234-1234-jobid1234567') + + expect(inference).not_to be_nil + expect(inference.id).to eq('12345678-1234-1234-1234-123456789abc') + + model = inference.model + expect(model).not_to be_nil + expect(model.id).to eq('12345678-1234-1234-1234-123456789abc') + + file = inference.file + expect(file).not_to be_nil + expect(file.name).to eq('complete.jpg') + expect(file.file_alias).to be_nil + expect(file.page_count).to eq(1) + expect(file.mime_type).to eq('image/jpeg') + + active_options = inference.active_options + expect(active_options).not_to be_nil + expect(active_options.raw_text).to eq(false) + expect(active_options.polygon).to eq(false) + expect(active_options.confidence).to eq(false) + expect(active_options.text_context).to eq(false) + expect(active_options.rag).to eq(false) + + fields = inference.result.fields + expect(fields).not_to be_empty + expect(fields.size).to eq(21) + + date_field = fields.get_simple_field('date') + expect(date_field).to be_a(simple_field) + expect(date_field.value).to eq('2019-11-02') + + expect(fields).to have_key('taxes') + taxes = fields.get_list_field('taxes') + expect(taxes).to be_a(list_field) + + expect(taxes.items.length).to eq(1) + expect(taxes.to_s).to be_a(String) + expect(taxes.to_s).to_not be_empty + + first_tax_item = taxes.items.first + expect(first_tax_item).to be_a(object_field) + + tax_item_obj = first_tax_item + expect(tax_item_obj.fields.size).to eq(3) + + expect(fields).to have_key('line_items') + line_items = fields.get_list_field('line_items') + expect(line_items).not_to be_nil + expect(line_items).to be_a(list_field) + first_line_item = line_items.object_items[0] + expect(first_line_item).to be_a(object_field) + expect(first_line_item.get_simple_field('quantity').value).to eq(1.0) + + base_field = tax_item_obj.fields.get_simple_field('base') + expect(base_field).to be_a(simple_field) + expect(base_field.value).to eq(31.5) + + expect(fields).to have_key('supplier_address') + supplier_address = fields.get_object_field('supplier_address') + expect(supplier_address).to be_a(object_field) + expect(supplier_address.to_s).to be_a(String) + expect(supplier_address.to_s).to_not be_empty + + country_field = supplier_address.fields.get_simple_field('country') + expect(country_field).to be_a(simple_field) + expect(country_field.value).to eq('USA') + expect(country_field.to_s).to eq('USA') + + customer_addr = fields.get_object_field('customer_address') + expect(customer_addr).to be_a(object_field) + city_field = customer_addr.fields.get_simple_field('city') + expect(city_field).to be_a(simple_field) + expect(city_field.value).to eq('New York') + + expect(inference.result.raw_text).to be_nil + end + end + + describe 'nested' do + it 'loads a deep nested object' do + response = load_v2_extraction_inference(deep_nested_field_path) + fields = response.inference.result.fields + + expect(fields['field_simple']).to be_a(simple_field) + expect(fields['field_object']).to be_a(object_field) + + field_object = fields.get_object_field('field_object') + expect(field_object.get_simple_field('sub_object_simple')).to be_a(simple_field) + expect(field_object.get_list_field('sub_object_list')).to be_a(list_field) + expect(field_object.get_object_field('sub_object_object')).to be_a(object_field) + expect(field_object.simple_fields.length).to eq(1) + expect(field_object.list_fields.length).to eq(1) + expect(field_object.object_fields.length).to eq(1) + lvl1 = field_object.fields + expect(lvl1['sub_object_list']).to be_a(list_field) + expect(lvl1['sub_object_list'].items).not_to be_empty + expect(lvl1['sub_object_list'].items[0]).to be_a(object_field) + expect(lvl1['sub_object_object']).to be_a(object_field) + + sub_object_object = lvl1.get_object_field('sub_object_object') + lvl2 = sub_object_object.fields + + expect(lvl2['sub_object_object_sub_object_list']).to be_a(list_field) + + nested_list = lvl2.get_list_field('sub_object_object_sub_object_list') + expect(nested_list.items).not_to be_empty + expect(nested_list.items.first).to be_a(object_field) + + first_item_obj = nested_list.items.first + deep_simple = first_item_obj.fields['sub_object_object_sub_object_list_simple'] + + expect(deep_simple).to be_a(simple_field) + expect(deep_simple.value).to eq('value_9') + end + end + + describe 'standard field types' do + def load_standard_fields + response = load_v2_extraction_inference(standard_field_path) + + active_options = response.inference.active_options + expect(active_options).not_to be_nil + expect(active_options.raw_text).to eq(true) + + fields = response.inference.result.fields + expect(fields).to be_a(Mindee::Parsing::V2::Field::InferenceFields) + + fields + end + + it 'recognizes simple fields' do + fields = load_standard_fields + + # low-level access + expect(fields['field_simple_string']).to be_a(simple_field) + expect(fields.get('field_simple_string')).to be_a(simple_field) + + field_simple_string = fields.get_simple_field('field_simple_string') + expect(field_simple_string).to be_a(simple_field) + expect(field_simple_string.value).to eq('field_simple_string-value') + expect(field_simple_string.confidence).to eq(field_confidence::CERTAIN) + expect(field_simple_string.to_s).to eq('field_simple_string-value') + + field_simple_int = fields.get_simple_field('field_simple_int') + expect(field_simple_int).to be_a(simple_field) + expect(field_simple_int.value).to be_a(Float) + + field_simple_float = fields.get_simple_field('field_simple_float') + expect(field_simple_float).to be_a(simple_field) + expect(field_simple_float.value).to be_a(Float) + + field_simple_bool = fields.get_simple_field('field_simple_bool') + expect(field_simple_bool).to be_a(simple_field) + expect(field_simple_bool.value).to eq(true) + expect(field_simple_bool.to_s).to eq('True') + + field_simple_null = fields.get_simple_field('field_simple_null') + expect(field_simple_null).to be_a(simple_field) + expect(field_simple_null.value).to be_nil + expect(field_simple_null.to_s).to eq('') + end + + it 'recognizes simple list fields' do + fields = load_standard_fields + + # low-level access + expect(fields['field_simple_list']).to be_a(list_field) + expect(fields.get('field_simple_list')).to be_a(list_field) + + field_simple_list = fields.get_list_field('field_simple_list') + expect(field_simple_list).to be_a(list_field) + + expect(field_simple_list.items[0]).to be_a(simple_field) + expect(field_simple_list.simple_items[0]).to be_a(simple_field) + field_simple_list.simple_items.each do |entry| + expect(entry).to be_a(simple_field) + expect(entry.value).not_to be_nil + end + end + + it 'recognizes object fields' do + fields = load_standard_fields + + # low-level access + expect(fields['field_object']).to be_a(object_field) + expect(fields.get('field_object')).to be_a(object_field) + + field_object = fields.get_object_field('field_object') + expect(field_object).to be_a(object_field) + expect(field_object.get_simple_field('subfield_1')).to be_a(simple_field) + field_object.fields.each_value do |entry| + expect(entry).to be_a(simple_field) + expect(entry.value).not_to be_nil + end + end + + it 'recognizes object list fields' do + fields = load_standard_fields + + # low-level access + expect(fields['field_object_list']).to be_a(list_field) + expect(fields.get('field_object_list')).to be_a(list_field) + + field_object_list = fields.get_list_field('field_object_list') + expect(field_object_list).to be_a(list_field) + + expect(field_object_list.items[0]).to be_a(object_field) + expect(field_object_list.object_items[0]).to be_a(object_field) + field_object_list.object_items.each do |entry| + expect(entry).to be_a(object_field) + expect(entry.fields).not_to be_nil + end + end + end + + describe 'raw_text' do + it 'exposes raw texts' do + response = load_v2_extraction_inference(raw_text_json_path) + + active_options = response.inference.active_options + expect(active_options).not_to be_nil + expect(active_options.raw_text).to eq(true) + + raw_text = response.inference.result.raw_text + expect(raw_text).not_to be_nil + expect(raw_text).to be_a(Mindee::Parsing::V2::RawText) + + expect(raw_text.to_s).to eq(File.read(raw_text_str_path, encoding: 'UTF-8')) + + expect(raw_text.pages.length).to eq(2) + first = raw_text.pages.first + expect(first).to be_a(Mindee::Parsing::V2::RawTextPage) + expect(first.content).to eq('This is the raw text of the first page...') + + raw_text.pages.each do |page| + expect(page.content).to be_a(String) + end + end + end + + describe 'rst display' do + it 'is properly exposed' do + response = load_v2_extraction_inference(standard_field_path) + rst_string = File.read(standard_field_rst_path, encoding: 'UTF-8') + + expect(response.inference).not_to be_nil + expect(response.inference.to_s).to eq(rst_string) + end + end + + describe 'field locations and confidence' do + it 'are properly exposed' do + response = load_v2_extraction_inference(location_field_path) + + expect(response.inference).not_to be_nil + + date_field = response.inference.result.fields.get_simple_field('date') + expect(date_field).to be_a(simple_field) + expect(date_field.locations).to be_an(Array) + expect(date_field.locations[0]).not_to be_nil + expect(date_field.locations[0].page).to eq(0) + + polygon = date_field.locations[0].polygon + expect(polygon[0]).to be_a(Mindee::Geometry::Point) + + expect(polygon[0].x).to eq(0.948979073166918) + expect(polygon[0].y).to eq(0.23097924535067715) + + expect(polygon[1][0]).to eq(0.85422) + expect(polygon[1][1]).to eq(0.230072) + + expect(polygon[2][0]).to eq(0.8540899268330819) + expect(polygon[2][1]).to eq(0.24365775464932288) + + expect(polygon[3][0]).to eq(0.948849) + expect(polygon[3][1]).to eq(0.244565) + + centroid = polygon.centroid + expect(centroid.x).to eq(0.9015345) + expect(centroid.y).to eq(0.23731850000000002) + + confidence = date_field.confidence + expect(confidence).to be_a(field_confidence) + # equality + expect(confidence).to eq(field_confidence::MEDIUM) + expect(confidence).to eq('Medium') + expect(confidence).to eq(2) + # less than or equal + expect(confidence).to be_lteql(field_confidence::HIGH) + expect(confidence).to be_lteql('High') + expect(confidence).to be_lteql(3) + # greater than or equal + expect(confidence).to be_gteql(field_confidence::LOW) + expect(confidence).to be_gteql('Low') + expect(confidence).to be_gteql(1) + end + end + describe 'RAG Metadata' do + it 'when matched' do + response = load_v2_extraction_inference(rag_matched_path) + expect(response.inference).not_to be_nil + expect(response.inference.result.rag.retrieved_document_id).to eq('12345abc-1234-1234-1234-123456789abc') + end + + it 'when not matched' do + response = load_v2_extraction_inference(rag_not_matched_path) + expect(response.inference).not_to be_nil + expect(response.inference.result.rag.retrieved_document_id).to be_nil + end + end + + describe 'text context' do + it 'when enabled' do + response = load_v2_extraction_inference(text_context_path) + expect(response.inference).not_to be_nil + expect(response.inference.active_options.text_context).to be_truthy + end + end +end diff --git a/spec/v2/product/ocr/ocr_integration.rb b/spec/v2/product/ocr/ocr_integration.rb new file mode 100644 index 00000000..b12c3e90 --- /dev/null +++ b/spec/v2/product/ocr/ocr_integration.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +require 'mindee' +require 'mindee/v2/product' + +describe Mindee::ClientV2, :integration, :v2 do + let(:ocr_model_id) do + ENV.fetch('MINDEE_V2_SE_TESTS_OCR_MODEL_ID') + end + + let(:v2_client) do + Mindee::ClientV2.new + end + + it 'processes ocr default sample correctly' do + input_source = Mindee::Input::Source::PathInputSource.new( + File.join(V2_PRODUCT_DATA_DIR, 'ocr', 'default_sample.jpg') + ) + + params = { model_id: ocr_model_id } + + response = v2_client.enqueue_and_get_result( + Mindee::V2::Product::Ocr::Ocr, + input_source, + params + ) + + expect(response.inference).not_to be_nil + expect(response.inference.file.name).to eq('default_sample.jpg') + expect(response.inference).to be_a(Mindee::V2::Product::Ocr::OcrInference) + expect(response.inference.result).to be_a(Mindee::V2::Product::Ocr::OcrResult) + expect(response.inference.result.pages.size).to eq(1) + expect(response.inference.result.pages[0].words.size).to be > 5 + end +end diff --git a/spec/v2/product/ocr/ocr_spec.rb b/spec/v2/product/ocr/ocr_spec.rb new file mode 100644 index 00000000..039a0585 --- /dev/null +++ b/spec/v2/product/ocr/ocr_spec.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +require 'json' +require 'mindee' + +describe Mindee::V2::Product::Ocr::Ocr, :v2 do + let(:ocr_data_dir) { File.join(V2_PRODUCT_DATA_DIR, 'ocr') } + + it 'parses a single page OCR response properly' do + json_path = File.join(ocr_data_dir, 'ocr_single.json') + json_sample = JSON.parse(File.read(json_path)) + + response = Mindee::V2::Product::Ocr::OcrResponse.new(json_sample) + + expect(response.inference).to be_a(Mindee::V2::Product::Ocr::OcrInference) + expect(response.inference.result.pages).not_to be_empty + expect(response.inference.result.pages.size).to eq(1) + + page = response.inference.result.pages[0] + first_word = page.words[0] + + expect(first_word.content).to eq('Shipper:') + expect(first_word.polygon[0][0]).to eq(0.09742441209406495) + expect(first_word.polygon[0][1]).to eq(0.07007125890736342) + expect(first_word.polygon[1][0]).to eq(0.15621500559910415) + expect(first_word.polygon[1][1]).to eq(0.07046714172604909) + expect(first_word.polygon[2][0]).to eq(0.15621500559910415) + expect(first_word.polygon[2][1]).to eq(0.08155186064924783) + expect(first_word.polygon[3][0]).to eq(0.09742441209406495) + expect(first_word.polygon[3][1]).to eq(0.08155186064924783) + + expect(page.words.size).to eq(305) + expect(page.content).to eq( + 'Shipper: GLOBAL FREIGHT SOLUTIONS INC. 123 OCEAN DRIVE SHANGHAI, CHINA TEL: ' \ + "86-21-12345678 FAX: 86-21-87654321\nConsignee: PACIFIC TRADING CO. 789 TRADE " \ + "STREET SINGAPORE 567890 SINGAPORE TEL: 65-65432100 FAX: 65-65432101\nNotify " \ + "Party (Complete name and address): SAME AS CONSIGNEE\nBILL OF LADING\nJob No " \ + ".: XYZ123456\nGLOBAL SHIPPING CO\nPlace of receipt:\nSHANGHAI, CHINA\nOcean " \ + "vessel:\nGLOBAL VOYAGER V-202\nPort of loading:\nSHANGHAI, CHINA\nPort of " \ + "discharge:\nLOS ANGELES, USA\nPlace of delivery:\nLOS ANGELES, USA\nMarks and " \ + "numbers:\nP+F\n(IN DIA.)\nP/N: 12345\nDRAWING NO. A1B2C3\nNumber and kinds of " \ + "packages: 1CTN ELECTRONIC COMPONENTS 50 PCS\nDescription of goods:\nGross " \ + "weight:\n500 KGS\nMeasurement:\n1.5 M3\nP/O: 987654 LOT NO. " \ + "112233\nFFAU1234567/40'HQ/CFS-CFS ICTN/500KGS/1.5M3 SEAL NO:ABC1234567\nMADE " \ + "IN CHINA\nSAY TOTAL:\n2 PLTS ONLY\n\"FREIGHT COLLECT\" CFS-CFS\n** SURRENDERED " \ + "**\nFreight and Charge\nOCEAN FREIGHT\nRevenue tons\nRate\nPrepaid\nCollect\n" \ + "AS ARRANGED\nThe goods and instructions are accepted and dealt with subject " \ + 'to the Standard Conditions printed overleaf. Taken in charge in apparent good ' \ + 'order and condition, unless otherwise noted herein, at the place of receipt ' \ + 'for transport and delivery as mentioned above. One of these Combined ' \ + 'Transport Bills of Lading must be surrendered duly endorsed in exchange for ' \ + 'the goods. In Witness whereof the original Combined Transport Bills of Lading ' \ + 'all of this tenor and date have been signed in the number stated below, one ' \ + "of which being accomplished the other(s) to be void.\nUSD: 31.57 SHIPPED ON " \ + "BOARD: 30. SEP. 2022\nFreight Amount OCEAN FREIGHT\nFreight payable at\n" \ + "DESTINATION\nNumber of original\nZERO (0)\nCargo insurance\nnot covered\n" \ + "Covered according to attached Policy\nPlace and date of issue\nTAIPEI, " \ + "TAIWAN: 30. SEP. 2022\nFor delivery of goods please apply to: INTERNATIONAL " \ + 'LOGISTICS LTD 456 SHIPPING LANE LOS ANGELES, CA 90001 USA TEL:1-213-9876543 ' \ + "FAX:1-213-9876544 ATTN: MR. JOHN DOE\nSignature: GLOBAL SHIPPING CO., " \ + "LTD.\nBY\nAS CARRIER" + ) + end + + it 'parses a multiple page OCR response properly' do + json_path = File.join(ocr_data_dir, 'ocr_multiple.json') + json_sample = JSON.parse(File.read(json_path)) + + response = Mindee::V2::Product::Ocr::OcrResponse.new(json_sample) + + expect(response.inference).to be_a(Mindee::V2::Product::Ocr::OcrInference) + expect(response.inference.result).to be_a(Mindee::V2::Product::Ocr::OcrResult) + expect(response.inference.result.pages[0]).to be_a(Mindee::V2::Product::Ocr::OcrPage) + expect(response.inference.result.pages.size).to eq(3) + + page_zero_words = response.inference.result.pages[0].words + expect(page_zero_words.size).to eq(295) + expect(page_zero_words[0].content).to eq('FICTIOCORP') + expect(page_zero_words[0].polygon[0][0]).to eq(0.06649402824332337) + expect(page_zero_words[0].polygon[0][1]).to eq(0.03957449719523875) + expect(page_zero_words[0].polygon[1][0]).to eq(0.23219061218068954) + expect(page_zero_words[0].polygon[1][1]).to eq(0.03960015049938432) + expect(page_zero_words[0].polygon[2][0]).to eq(0.23219061218068954) + expect(page_zero_words[0].polygon[2][1]).to eq(0.06770762074155151) + expect(page_zero_words[0].polygon[3][0]).to eq(0.06649402824332337) + expect(page_zero_words[0].polygon[3][1]).to eq(0.06770762074155151) + + page_one_words = response.inference.result.pages[1].words + expect(page_one_words.size).to eq(450) + expect(page_one_words[0].content).to eq('KEOLIO') + + page_two_words = response.inference.result.pages[2].words + expect(page_two_words.size).to eq(355) + expect(page_two_words[0].content).to eq('KEOLIO') + end +end diff --git a/spec/v2/product/split/split_integration.rb b/spec/v2/product/split/split_integration.rb new file mode 100644 index 00000000..adbfc852 --- /dev/null +++ b/spec/v2/product/split/split_integration.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +require 'mindee' +require 'mindee/v2/product' + +describe Mindee::ClientV2, :integration, :v2 do + let(:split_model_id) do + ENV.fetch('MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID') + end + + let(:v2_client) do + Mindee::ClientV2.new + end + + it 'processes split default sample correctly' do + input_source = Mindee::Input::Source::PathInputSource.new( + File.join(V2_PRODUCT_DATA_DIR, 'split', 'default_sample.pdf') + ) + + params = { model_id: split_model_id } + + response = v2_client.enqueue_and_get_result( + Mindee::V2::Product::Split::Split, + input_source, + params + ) + + expect(response.inference).not_to be_nil + expect(response.inference.file.name).to eq('default_sample.pdf') + expect(response.inference.result.splits).not_to be_empty + expect(response.inference.result.splits.size).to eq(2) + end +end diff --git a/spec/v2/product/split/split_spec.rb b/spec/v2/product/split/split_spec.rb new file mode 100644 index 00000000..e946ee28 --- /dev/null +++ b/spec/v2/product/split/split_spec.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +require 'json' +require 'mindee' +require 'mindee/v2/product' + +describe Mindee::V2::Product::Split::Split, :v2 do + let(:split_data_dir) { File.join(V2_PRODUCT_DATA_DIR, 'split') } + + it 'parses a single split properly' do + json_path = File.join(split_data_dir, 'split_single.json') + json_sample = JSON.parse(File.read(json_path)) + + response = Mindee::V2::Product::Split::SplitResponse.new(json_sample) + + expect(response.inference).to be_a(Mindee::V2::Product::Split::SplitInference) + expect(response.inference.result.splits).not_to be_empty + expect(response.inference.result.splits[0].page_range.size).to eq(2) + expect(response.inference.result.splits[0].page_range[0]).to eq(0) + expect(response.inference.result.splits[0].page_range[1]).to eq(0) + expect(response.inference.result.splits[0].document_type).to eq('receipt') + end + + it 'parses multiple splits properly' do + json_path = File.join(split_data_dir, 'split_multiple.json') + json_sample = JSON.parse(File.read(json_path)) + + response = Mindee::V2::Product::Split::SplitResponse.new(json_sample) + + expect(response.inference).to be_a(Mindee::V2::Product::Split::SplitInference) + expect(response.inference.result).to be_a(Mindee::V2::Product::Split::SplitResult) + expect(response.inference.result.splits[0]).to be_a(Mindee::V2::Product::Split::SplitRange) + expect(response.inference.result.splits.size).to eq(3) + + split_zero = response.inference.result.splits[0] + expect(split_zero.page_range.size).to eq(2) + expect(split_zero.page_range[0]).to eq(0) + expect(split_zero.page_range[1]).to eq(0) + expect(split_zero.document_type).to eq('invoice') + + split_one = response.inference.result.splits[1] + expect(split_one.page_range.size).to eq(2) + expect(split_one.page_range[0]).to eq(1) + expect(split_one.page_range[1]).to eq(3) + expect(split_one.document_type).to eq('invoice') + + split_two = response.inference.result.splits[2] + expect(split_two.page_range.size).to eq(2) + expect(split_two.page_range[0]).to eq(4) + expect(split_two.page_range[1]).to eq(4) + expect(split_two.document_type).to eq('invoice') + end +end