From 121746b9b66af1484ae92a4ebbbb4a34918e70d7 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Tue, 24 Feb 2026 14:04:29 +0100 Subject: [PATCH 01/23] temp --- .github/workflows/_test-code-samples.yml | 2 +- .github/workflows/_test-integrations.yml | 1 + lib/mindee.rb | 4 + lib/mindee/client_v2.rb | 44 ++++++-- lib/mindee/errors/mindee_http_error_v2.rb | 4 +- lib/mindee/http/mindee_api_v2.rb | 35 +++++- lib/mindee/input/base_parameters.rb | 93 ++++++++++++++++ lib/mindee/input/inference_parameters.rb | 108 ++++++------------- lib/mindee/input/local_response.rb | 4 +- lib/mindee/parsing/v2/inference.rb | 17 +-- lib/mindee/parsing/v2/inference_response.rb | 12 ++- lib/mindee/v2.rb | 1 + lib/mindee/v2/parsing.rb | 1 + lib/mindee/v2/parsing/base_inference.rb | 26 +++++ lib/mindee/v2/parsing/base_response.rb | 18 ++++ sig/mindee/client_v2.rbs | 21 ++-- sig/mindee/errors/mindee_http_error_v2.rbs | 2 +- sig/mindee/http/mindee_api_v2.rbs | 9 ++ sig/mindee/input/base_parameters.rbs | 28 +++++ sig/mindee/input/inference_parameters.rbs | 13 +-- sig/mindee/input/local_response.rbs | 2 +- sig/mindee/parsing/v2/inference.rbs | 7 +- sig/mindee/parsing/v2/inference_response.rbs | 9 +- sig/mindee/v2/parsing/base_inference.rbs | 16 +++ sig/mindee/v2/parsing/base_response.rbs | 13 +++ 25 files changed, 354 insertions(+), 136 deletions(-) create mode 100644 lib/mindee/input/base_parameters.rb create mode 100644 lib/mindee/v2.rb create mode 100644 lib/mindee/v2/parsing.rb create mode 100644 lib/mindee/v2/parsing/base_inference.rb create mode 100644 lib/mindee/v2/parsing/base_response.rb create mode 100644 sig/mindee/input/base_parameters.rbs create mode 100644 sig/mindee/v2/parsing/base_inference.rbs create mode 100644 sig/mindee/v2/parsing/base_response.rbs diff --git a/.github/workflows/_test-code-samples.yml b/.github/workflows/_test-code-samples.yml index ca03e5b8..f00db7e3 100644 --- a/.github/workflows/_test-code-samples.yml +++ b/.github/workflows/_test-code-samples.yml @@ -32,4 +32,4 @@ jobs: env: MINDEE_LOG_LEVEL: DEBUG run: | - ./spec/test_code_samples.sh ${{ secrets.MINDEE_ACCOUNT_SE_TESTS }} ${{ secrets.MINDEE_ENDPOINT_SE_TESTS }} ${{ secrets.MINDEE_API_KEY_SE_TESTS }} ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} + ./spec/test_code_samples.sh ${{ secrets.MINDEE_ACCOUNT_SE_TESTS }} ${{ secrets.MINDEE_ENDPOINT_SE_TESTS }} ${{ secrets.MINDEE_API_KEY_SE_TESTS }} ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} {{secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID}} diff --git a/.github/workflows/_test-integrations.yml b/.github/workflows/_test-integrations.yml index 72ad67cd..0259f138 100644 --- a/.github/workflows/_test-integrations.yml +++ b/.github/workflows/_test-integrations.yml @@ -56,6 +56,7 @@ jobs: MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} MINDEE_V2_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} MINDEE_V2_SE_TESTS_BLANK_PDF_URL: ${{ secrets.MINDEE_V2_SE_TESTS_BLANK_PDF_URL }} + MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }} MINDEE_LOG_LEVEL: DEBUG run: | bundle exec rake integration diff --git a/lib/mindee.rb b/lib/mindee.rb index 05378eb4..f885eacc 100644 --- a/lib/mindee.rb +++ b/lib/mindee.rb @@ -84,6 +84,10 @@ module IND module US end end + + # V2-specific module. + module V2 + end end # Shorthand to call the logger from anywhere. diff --git a/lib/mindee/client_v2.rb b/lib/mindee/client_v2.rb index f70be6a4..57ef00b3 100644 --- a/lib/mindee/client_v2.rb +++ b/lib/mindee/client_v2.rb @@ -36,22 +36,28 @@ def get_job(job_id) # Enqueue a document for async parsing. # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] # The source of the input document (local file or URL). - # @param params [Hash, InferenceParameters] + # @param params [Hash, BaseParameters] # @return [Mindee::Parsing::V2::JobResponse] def enqueue_inference(input_source, params) - normalized_params = normalize_inference_parameters(params) + normalized_params = normalize_inference_parameters(Input::InferenceParameters, params) logger.debug("Enqueueing document to model '#{normalized_params.model_id}'.") @mindee_api.req_post_inference_enqueue(input_source, normalized_params) end - # Enqueue a document for async parsing and automatically try to retrieve it. + # Enqueues to an asynchronous endpoint and automatically polls for a response. + # + # @param response_type [Mindee::V2::BaseResponse] The return class. # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] # The source of the input document (local file or URL). # @param params [Hash, InferenceParameters] Parameters for the inference. - # @return [Mindee::Parsing::V2::InferenceResponse] - def enqueue_and_get_inference(input_source, params) - normalized_params = normalize_inference_parameters(params) + # @return [Mindee::Parsing::Common::ApiResponse] + def enqueue_and_get_result( + response_type, + input_source, + params + ) + normalized_params = normalize_inference_parameters(response_type._params_type, params) normalized_params.validate_async_params enqueue_response = enqueue_inference(input_source, normalized_params) @@ -97,13 +103,29 @@ def enqueue_and_get_inference(input_source, params) "Asynchronous parsing request timed out after #{sec_count} seconds" end + # Enqueue a document for async parsing and automatically try to retrieve it. + # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] + # The source of the input document (local file or URL). + # @param params [Hash, InferenceParameters] Parameters for the inference. + # @return [Mindee::Parsing::V2::InferenceResponse] + def enqueue_and_get_inference(input_source, params) + warn '[DEPRECATION] `enqueue_and_get_inference` is deprecated; use `enqueue_and_get_result` instead.' + + response = enqueue_and_get_result(Mindee::Parsing::V2::InferenceResponse, input_source, params) + unless response.is_a?(Mindee::Parsing::V2::InferenceResponse) + raise TypeError, "Invalid response type \"#{response.class}\"" + end + + response + end + # If needed, converts the parsing options provided as a hash into a proper InferenceParameters object. - # @param params [Hash, InferenceParameters] Params. - # @return [InferenceParameters] - def normalize_inference_parameters(params) - return params if params.is_a?(Input::InferenceParameters) + # @param params [Hash, Class] Params. + # @return [BaseParameters] + def normalize_inference_parameters(param_class, params) + return param_class.from_hash(params: params) if params.is_a?(Hash) - Input::InferenceParameters.from_hash(params: params) + params end end end diff --git a/lib/mindee/errors/mindee_http_error_v2.rb b/lib/mindee/errors/mindee_http_error_v2.rb index 2cde4df5..90af794a 100644 --- a/lib/mindee/errors/mindee_http_error_v2.rb +++ b/lib/mindee/errors/mindee_http_error_v2.rb @@ -18,7 +18,7 @@ class MindeeHTTPErrorV2 < MindeeError # @return [Array] A list of explicit error details. attr_reader :errors - # @param http_error [Hash, Parsing::V2::ErrorResponse] + # @param http_error [Hash, Mindee::Parsing::V2::ErrorResponse] def initialize(http_error) if http_error.is_a?(Parsing::V2::ErrorResponse) http_error = { 'detail' => http_error.detail, @@ -33,7 +33,7 @@ def initialize(http_error) @code = http_error['code'] @errors = if http_error.key?('errors') http_error['errors'].map do |error| - Parsing::V2::ErrorItem.new(error) + Mindee::Parsing::V2::ErrorItem.new(error) end else [] diff --git a/lib/mindee/http/mindee_api_v2.rb b/lib/mindee/http/mindee_api_v2.rb index 8465f03d..0e239464 100644 --- a/lib/mindee/http/mindee_api_v2.rb +++ b/lib/mindee/http/mindee_api_v2.rb @@ -29,7 +29,7 @@ def req_post_inference_enqueue(input_source, params) input_source, params ) - Parsing::V2::JobResponse.new(process_response(response)) + Mindee::Parsing::V2::JobResponse.new(process_response(response)) end # Retrieves a queued inference. @@ -37,11 +37,19 @@ def req_post_inference_enqueue(input_source, params) # @param inference_id [String] # @return [Mindee::Parsing::V2::InferenceResponse] def req_get_inference(inference_id) + req_get_result(Mindee::Parsing::V2::InferenceResponse, inference_id) + end + + # Retrieves a result from a given queue. + # @param response_class [Class] + # @param inference_id [String] + # @return [Mindee::Parsing::V2::BaseResponse] + def req_get_result(response_class, inference_id) @settings.check_api_key response = inference_result_req_get( inference_id ) - Parsing::V2::InferenceResponse.new(process_response(response)) + response_class.new(process_response(response)) end # Retrieves a queued job. @@ -53,7 +61,28 @@ def req_get_job(job_id) response = inference_job_req_get( job_id ) - Parsing::V2::JobResponse.new(process_response(response)) + Mindee::Parsing::V2::JobResponse.new(process_response(response)) + end + + # Retrieves a queued job. + # + # @param url [String] + # @return [Mindee::Parsing::V2::JobResponse] + def req_get_job_url(url) + @settings.check_api_key + response = poll(url) + Mindee::Parsing::V2::JobResponse.new(process_response(response)) + end + + # Retrieves a queued job. + # + # @param result_class [Mindee::V2::Parsing::BaseResponse] + # @param url [String] + # @return [Mindee::Parsing::V2::JobResponse] + def req_get_result_url(result_class, url) + @settings.check_api_key + response = poll(url) + result_class.new(process_response(response)) end private diff --git a/lib/mindee/input/base_parameters.rb b/lib/mindee/input/base_parameters.rb new file mode 100644 index 00000000..9a989c3e --- /dev/null +++ b/lib/mindee/input/base_parameters.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +module Mindee + module Input + # Base class for parameters accepted by all V2 endpoints. + class BaseParameters + # @return [String] ID of the model (required). + attr_reader :model_id + + # @return [String, nil] Optional alias for the file. + attr_reader :file_alias + + # @return [Array, nil] Optional list of Webhooks IDs to propagate the API response to. + attr_reader :webhook_ids + + # @return [PollingOptions] Options for polling. Set only if having timeout issues. + attr_reader :polling_options + + # @return [Boolean, nil] Whether to close the file after parsing. + attr_reader :close_file + + # @return [String] Slug for the endpoint. + + # @param [String] model_id ID of the model + # @param [String, nil] file_alias File alias, if applicable. + # @param [Array, nil] webhook_ids + # @param [Hash, nil] polling_options + # @param [Boolean, nil] close_file + def initialize( + model_id, + file_alias: nil, + webhook_ids: nil, + polling_options: nil, + close_file: true + ) + raise Errors::MindeeInputError, 'Model ID is required.' if model_id.empty? || model_id.nil? + + @model_id = model_id + @file_alias = file_alias + @webhook_ids = webhook_ids || [] + @polling_options = get_clean_polling_options(polling_options) + @close_file = close_file.nil? || close_file + end + + # Loads a prediction from a Hash. + # @param [Hash] params Parameters to provide as a hash. + # @return [Hash] + def self.from_hash(params: {}) + params.transform_keys!(&:to_sym) + + if params.empty? || params[:model_id].nil? || params[:model_id].empty? + raise Errors::MindeeInputError, 'Model ID is required.' + end + + polling_options_input = params.fetch(:page_options, PollingOptions.new) + if polling_options_input.is_a?(Hash) + polling_options_input = polling_options_input.transform_keys(&:to_sym) + PollingOptions.new( + initial_delay_sec: polling_options_input.fetch(:initial_delay_sec, 2.0), + delay_sec: polling_options_input.fetch(:delay_sec, 1.5), + max_retries: polling_options_input.fetch(:max_retries, 80) + ) + end + params + end + + private + + # Cleans a proper polling options object potentially from a hash. + # @param [Hash, PollingOptions, nil] polling_options Polling options. + # @return [PollingOptions] Valid polling options object. + def get_clean_polling_options(polling_options) + return PollingOptions.new if polling_options.is_a?(PollingOptions) + + if polling_options.is_a?(Hash) + polling_options = polling_options.transform_keys(&:to_sym) + output_polling_options = PollingOptions.new( + initial_delay_sec: polling_options.fetch(:initial_delay_sec, 2.0), + delay_sec: polling_options.fetch(:delay_sec, 1.5), + max_retries: polling_options.fetch(:max_retries, 80) + ) + else + output_polling_options = if polling_options.is_a?(PollingOptions) + polling_options || PollingOptions.new + else + PollingOptions.new + end + end + output_polling_options + end + end + end +end diff --git a/lib/mindee/input/inference_parameters.rb b/lib/mindee/input/inference_parameters.rb index 1a38fca4..4c7f9e82 100644 --- a/lib/mindee/input/inference_parameters.rb +++ b/lib/mindee/input/inference_parameters.rb @@ -1,14 +1,12 @@ # frozen_string_literal: true require_relative 'data_schema' +require_relative '../input/base_parameters' module Mindee module Input # Parameters to set when sending a file for inference. - class InferenceParameters - # @return [String] ID of the model (required). - attr_reader :model_id - + class InferenceParameters < Mindee::Input::BaseParameters # @return [Boolean, nil] Enhance extraction accuracy with Retrieval-Augmented Generation. attr_reader :rag @@ -24,35 +22,27 @@ class InferenceParameters # Calculate confidence scores for all fields, and fill their confidence attribute. attr_reader :confidence - # @return [String, nil] Optional alias for the file. - attr_reader :file_alias - # @return [String, nil] Additional text context used by the model during inference. # Not recommended, for specific use only. attr_reader :text_context - # @return [Array, nil] Optional list of Webhooks IDs to propagate the API response to. - attr_reader :webhook_ids - - # @return [PollingOptions] Options for polling. Set only if having timeout issues. - attr_reader :polling_options - # @return [DataSchemaField] attr_reader :data_schema - # @return [Boolean, nil] Whether to close the file after parsing. - attr_reader :close_file + @_slug = 'extraction' # rubocop:disable Metrics/ParameterLists # @param [String] model_id ID of the model - # @param [nil] rag Whether to enable RAG. - # @param [nil] raw_text Whether to enable rax text. - # @param [nil] polygon Whether to enable polygons. - # @param [nil] confidence Whether to enable confidence scores. - # @param [nil] file_alias File alias, if applicable. - # @param [nil] webhook_ids - # @param [nil] polling_options - # @param [TrueClass] close_file + # @param [Boolean, nil] rag Whether to enable RAG. + # @param [Boolean, nil] raw_text Whether to enable rax text. + # @param [Boolean, nil] polygon Whether to enable polygons. + # @param [Boolean, nil] confidence Whether to enable confidence scores. + # @param [String, nil] file_alias File alias, if applicable. + # @param [Array, nil] webhook_ids + # @param [String, nil] text_context + # @param [Hash, nil] polling_options + # @param [Boolean, nil] close_file + # @param [DataSchemaField, String, Hash nil] data_schema def initialize( model_id, rag: nil, @@ -66,18 +56,19 @@ def initialize( close_file: true, data_schema: nil ) - raise Errors::MindeeInputError, 'Model ID is required.' if model_id.empty? || model_id.nil? + super( + model_id, + file_alias: file_alias, + webhook_ids: webhook_ids, + polling_options: polling_options, + close_file: close_file + ) - @model_id = model_id @rag = rag @raw_text = raw_text @polygon = polygon @confidence = confidence - @file_alias = file_alias - @webhook_ids = webhook_ids || [] @text_context = text_context - @polling_options = get_clean_polling_options(polling_options) - @close_file = close_file.nil? || close_file @data_schema = DataSchema.new(data_schema) unless data_schema.nil? # rubocop:enable Metrics/ParameterLists end @@ -106,56 +97,23 @@ def validate_async_params # @param [Hash] params Parameters to provide as a hash. # @return [InferenceParameters] def self.from_hash(params: {}) - params.transform_keys!(&:to_sym) - - if params.empty? || params[:model_id].nil? || params[:model_id].empty? - raise Errors::MindeeInputError, 'Model ID is required.' - end - - model_id = params.fetch(:model_id) rag = params.fetch(:rag, nil) raw_text = params.fetch(:raw_text, nil) polygon = params.fetch(:polygon, nil) confidence = params.fetch(:confidence, nil) - file_alias = params.fetch(:file_alias, nil) - webhook_ids = params.fetch(:webhook_ids, []) - polling_options_input = params.fetch(:page_options, PollingOptions.new) - if polling_options_input.is_a?(Hash) - polling_options_input = polling_options_input.transform_keys(&:to_sym) - PollingOptions.new( - initial_delay_sec: polling_options_input.fetch(:initial_delay_sec, 2.0), - delay_sec: polling_options_input.fetch(:delay_sec, 1.5), - max_retries: polling_options_input.fetch(:max_retries, 80) - ) - end - close_file = params.fetch(:close_file, true) - InferenceParameters.new(model_id, rag: rag, raw_text: raw_text, polygon: polygon, confidence: confidence, - file_alias: file_alias, webhook_ids: webhook_ids, close_file: close_file) - end - - private - - # Cleans a proper polling options object potentially from a hash. - # @param [Hash, PollingOptions, nil] polling_options Polling options. - # @return [PollingOptions] Valid polling options object. - def get_clean_polling_options(polling_options) - return PollingOptions.new if polling_options.is_a?(PollingOptions) - - if polling_options.is_a?(Hash) - polling_options = polling_options.transform_keys(&:to_sym) - output_polling_options = PollingOptions.new( - initial_delay_sec: polling_options.fetch(:initial_delay_sec, 2.0), - delay_sec: polling_options.fetch(:delay_sec, 1.5), - max_retries: polling_options.fetch(:max_retries, 80) - ) - else - output_polling_options = if polling_options.is_a?(PollingOptions) - polling_options || PollingOptions.new - else - PollingOptions.new - end - end - output_polling_options + base_params = super + new_params = base_params.merge(rag: rag, raw_text: raw_text, polygon: polygon, confidence: confidence) + model_id = new_params.fetch(:model_id) + + InferenceParameters.new( + model_id, rag: rag, + raw_text: raw_text, + polygon: polygon, + confidence: confidence, + file_alias: params.fetch(:file_alias, nil), + webhook_ids: params.fetch(:webhook_ids, nil), + close_file: params.fetch(:close_file, true) + ) end end end diff --git a/lib/mindee/input/local_response.rb b/lib/mindee/input/local_response.rb index ca8988ab..7d4b5779 100644 --- a/lib/mindee/input/local_response.rb +++ b/lib/mindee/input/local_response.rb @@ -70,9 +70,9 @@ def valid_hmac_signature?(secret_key, signature) # Deserializes a loaded response # @param response_class [Parsing::V2::CommonResponse] class to return. - # @return [Parsing::V2::JobResponse, Parsing::V2::InferenceResponse] + # @return [Parsing::V2::JobResponse, Mindee::Parsing::V2::InferenceResponse] def deserialize_response(response_class) - response_class.new(as_hash) # : Parsing::V2::JobResponse | Parsing::V2::InferenceResponse + response_class.new(as_hash) # : Mindee::Parsing::V2::JobResponse | Mindee::Parsing::V2::InferenceResponse rescue StandardError raise Errors::MindeeInputError, 'Invalid response provided.' end diff --git a/lib/mindee/parsing/v2/inference.rb b/lib/mindee/parsing/v2/inference.rb index b462fc51..b4bb975c 100644 --- a/lib/mindee/parsing/v2/inference.rb +++ b/lib/mindee/parsing/v2/inference.rb @@ -5,20 +5,13 @@ require_relative 'inference_file' require_relative 'inference_result' require_relative 'inference_active_options' +require_relative '../../v2/parsing/base_inference' module Mindee module Parsing module V2 # Complete data returned by an inference request. - class Inference - # @return [String] Identifier of the inference (when provided by API). - attr_reader :id - # @return [InferenceJob] Metadata about the job. - attr_reader :job - # @return [InferenceModel] Information about the model used. - attr_reader :model - # @return [InferenceFile] Information about the processed file. - attr_reader :file + class Inference < Mindee::V2::Parsing::BaseInference # @return [InferenceActiveOptions] Options which were activated during the inference. attr_reader :active_options # @return [InferenceResult] Result contents. @@ -28,13 +21,9 @@ class Inference def initialize(server_response) raise ArgumentError, 'server_response must be a Hash' unless server_response.is_a?(Hash) - @model = InferenceModel.new(server_response['model']) - @job = InferenceJob.new(server_response['job']) if server_response.key?('job') - @file = InferenceFile.new(server_response['file']) + super @active_options = InferenceActiveOptions.new(server_response['active_options']) @result = InferenceResult.new(server_response['result']) - - @id = server_response['id'] end # String representation. diff --git a/lib/mindee/parsing/v2/inference_response.rb b/lib/mindee/parsing/v2/inference_response.rb index 1aac55a8..bcdcfe05 100644 --- a/lib/mindee/parsing/v2/inference_response.rb +++ b/lib/mindee/parsing/v2/inference_response.rb @@ -2,18 +2,21 @@ require_relative 'common_response' require_relative 'inference' +require_relative '../../v2/parsing/base_response' module Mindee module Parsing module V2 # HTTP response wrapper that embeds a V2 Inference. - class InferenceResponse < CommonResponse + class InferenceResponse < Mindee::V2::Parsing::BaseResponse # @return [Inference] Parsed inference payload. attr_reader :inference + @_slug = 'extraction/results' + @_params_type = Input::InferenceParameters + # @param server_response [Hash] Hash parsed from the API JSON response. def initialize(server_response) - # CommonResponse takes care of the generic metadata (status, etc.) super @inference = Inference.new(server_response['inference']) @@ -24,6 +27,11 @@ def initialize(server_response) def to_s @inference.to_s end + + # @return[String] Getter for the inference slug. + def self.result_slug + @_slug + end end end end diff --git a/lib/mindee/v2.rb b/lib/mindee/v2.rb new file mode 100644 index 00000000..8e9b8f90 --- /dev/null +++ b/lib/mindee/v2.rb @@ -0,0 +1 @@ +# frozen_string_literal: true diff --git a/lib/mindee/v2/parsing.rb b/lib/mindee/v2/parsing.rb new file mode 100644 index 00000000..8e9b8f90 --- /dev/null +++ b/lib/mindee/v2/parsing.rb @@ -0,0 +1 @@ +# frozen_string_literal: true diff --git a/lib/mindee/v2/parsing/base_inference.rb b/lib/mindee/v2/parsing/base_inference.rb new file mode 100644 index 00000000..6bb247b0 --- /dev/null +++ b/lib/mindee/v2/parsing/base_inference.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Parsing + # Base class for V2 inference responses. + class BaseInference + # @return [InferenceJob] Metadata about the job. + attr_reader :job + # @return [Parsing::V2::InferenceModel] Model info for the inference. + attr_reader :model + # @return [Parsing::V2::InferenceFile] File info for the inference. + attr_reader :file + # @return [String] ID of the inference. + attr_reader :id + + def initialize(http_response) + @model = Mindee::Parsing::V2::InferenceModel.new(http_response['model']) + @file = Mindee::Parsing::V2::InferenceFile.new(http_response['file']) + @id = http_response['id'] + @job = Mindee::Parsing::V2::InferenceJob.new(http_response['job']) if http_response.key?('job') + end + end + end + end +end diff --git a/lib/mindee/v2/parsing/base_response.rb b/lib/mindee/v2/parsing/base_response.rb new file mode 100644 index 00000000..7c310486 --- /dev/null +++ b/lib/mindee/v2/parsing/base_response.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Parsing + # Base class for V2 inference responses. + class BaseResponse < Mindee::Parsing::V2::CommonResponse + # @return [BaseInference] The inference result for a split utility request + attr_reader :inference + + # @return [String] The slug of the endpoint used for this response + @_slug = 'extraction/results' + # @return [Class] The class of the parameters used for this response + @_params_type + end + end + end +end diff --git a/sig/mindee/client_v2.rbs b/sig/mindee/client_v2.rbs index d33cdbb5..56b82357 100644 --- a/sig/mindee/client_v2.rbs +++ b/sig/mindee/client_v2.rbs @@ -2,16 +2,25 @@ OTS_OWNER: String module Mindee + interface _ParametersFactory[T] + def new: (Hash[String | Symbol, untyped]) -> T + def from_hash: (params: untyped) -> T + end class ClientV2 attr_reader mindee_api: HTTP::MindeeApiV2 def logger: () -> Logger def initialize: (?api_key: String) -> void - def get_inference: (String) -> Parsing::V2::InferenceResponse - def get_job: (String) -> Parsing::V2::JobResponse - def enqueue_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters) -> Parsing::V2::JobResponse - def enqueue_and_get_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters) -> Parsing::V2::InferenceResponse - def validate_async_params: (Integer | Float, Integer | Float, Integer) -> void - def normalize_inference_parameters: (Hash[String | Symbol, untyped] | Input::InferenceParameters) -> Input::InferenceParameters + def get_inference: (String) -> Mindee::Parsing::V2::InferenceResponse + def get_job: (String) -> Mindee::Parsing::V2::JobResponse + def enqueue_inference: [T < Input::BaseParameters] (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | T) -> Mindee::Parsing::V2::JobResponse + def enqueue_and_get_result: [T < Input::BaseParameters] ( + singleton(Mindee::V2::Parsing::BaseResponse), + Input::Source::LocalInputSource | Input::Source::URLInputSource, + Hash[String | Symbol, untyped] | T + ) -> Mindee::V2::Parsing::BaseResponse[untyped] + def enqueue_and_get_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters) -> Mindee::Parsing::V2::InferenceResponse + def validate_async_params: (Integer | Float, Integer | Float, Integer) -> void + def normalize_inference_parameters: [T < Input::BaseParameters] (_ParametersFactory[T], Hash[String | Symbol, untyped] | T) -> T end end diff --git a/sig/mindee/errors/mindee_http_error_v2.rbs b/sig/mindee/errors/mindee_http_error_v2.rbs index b6b3d924..02e403a1 100644 --- a/sig/mindee/errors/mindee_http_error_v2.rbs +++ b/sig/mindee/errors/mindee_http_error_v2.rbs @@ -9,7 +9,7 @@ module Mindee attr_reader title: String attr_reader errors: Array[Parsing::V2::ErrorItem] - def initialize: (Hash[String, untyped] | Parsing::V2::ErrorResponse) -> void + def initialize: (Hash[String, untyped] | Mindee::Parsing::V2::ErrorResponse) -> void end end end diff --git a/sig/mindee/http/mindee_api_v2.rbs b/sig/mindee/http/mindee_api_v2.rbs index 80f6db4f..8aa5f85e 100644 --- a/sig/mindee/http/mindee_api_v2.rbs +++ b/sig/mindee/http/mindee_api_v2.rbs @@ -1,10 +1,19 @@ # lib/mindee/http/mindee_api_v2.rb module Mindee module HTTP + interface _ResponseFactory[T] + def new: (Hash[String | Symbol, untyped]) -> T + end + class MindeeApiV2 attr_reader settings: ApiSettingsV2 def initialize: (?api_key: String?) -> void + + def req_get_job_url: (String) -> Parsing::V2::JobResponse + def req_get_result: [T] (_ResponseFactory[T] response_class, String inference_id) -> T + def req_get_result_url: [T] (_ResponseFactory[T] result_class, String url) -> T + def req_post_inference_enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::InferenceParameters) -> Parsing::V2::JobResponse def req_get_inference: (String) -> Parsing::V2::InferenceResponse def req_get_job: (String) -> Parsing::V2::JobResponse diff --git a/sig/mindee/input/base_parameters.rbs b/sig/mindee/input/base_parameters.rbs new file mode 100644 index 00000000..4cb13d0f --- /dev/null +++ b/sig/mindee/input/base_parameters.rbs @@ -0,0 +1,28 @@ +# lib/mindee/input/base_parameters.rb + +module Mindee + module Input + class BaseParameters + def self.from_hash: (params: Hash[String | Symbol, untyped]) -> Hash[String | Symbol, untyped] + + attr_reader model_id: String + attr_reader file_alias: String? + attr_reader webhook_ids: Array[String]? + attr_reader polling_options: PollingOptions + attr_reader close_file: bool + @slug: String + + def initialize: ( + String, + ?file_alias: String?, + ?webhook_ids: Array[String]?, + ?polling_options: Hash[Symbol | String, untyped] | PollingOptions?, + ?close_file: bool? + ) -> void + + private + + def get_clean_polling_options: (Hash[String | Symbol, untyped] | PollingOptions?) -> PollingOptions + end + end +end diff --git a/sig/mindee/input/inference_parameters.rbs b/sig/mindee/input/inference_parameters.rbs index 1e929c5f..d351b97f 100644 --- a/sig/mindee/input/inference_parameters.rbs +++ b/sig/mindee/input/inference_parameters.rbs @@ -1,17 +1,14 @@ # lib/mindee/input/inference_parameters.rb module Mindee module Input - class InferenceParameters - attr_reader close_file: bool + class InferenceParameters < BaseParameters + self.@_slug: String + attr_reader confidence: bool? - attr_reader file_alias: String? - attr_reader model_id: String - attr_reader polling_options: PollingOptions attr_reader polygon: bool? attr_reader rag: bool? attr_reader raw_text: bool? attr_reader text_context: String? - attr_reader webhook_ids: Array[String]? attr_reader data_schema: DataSchema? def initialize: ( @@ -31,10 +28,6 @@ module Mindee def self.from_hash: (params: Hash[String | Symbol, untyped]) -> InferenceParameters def validate_async_params: -> void - - private - - def get_clean_polling_options: (Hash[String | Symbol, untyped] | PollingOptions?) -> PollingOptions end end end diff --git a/sig/mindee/input/local_response.rbs b/sig/mindee/input/local_response.rbs index fd28bd5f..5de48877 100644 --- a/sig/mindee/input/local_response.rbs +++ b/sig/mindee/input/local_response.rbs @@ -8,7 +8,7 @@ module Mindee def self.process_secret_key: (String) -> String def get_hmac_signature: (String) -> String def valid_hmac_signature?: (String, String) -> bool - def deserialize_response: (singleton(Parsing::V2::CommonResponse))-> (Parsing::V2::JobResponse | Parsing::V2::InferenceResponse) + def deserialize_response: (singleton(Parsing::V2::CommonResponse))-> (Parsing::V2::JobResponse | Mindee::Parsing::V2::InferenceResponse) end end end diff --git a/sig/mindee/parsing/v2/inference.rbs b/sig/mindee/parsing/v2/inference.rbs index cbdc794c..e95fc833 100644 --- a/sig/mindee/parsing/v2/inference.rbs +++ b/sig/mindee/parsing/v2/inference.rbs @@ -2,12 +2,7 @@ module Mindee module Parsing module V2 - class Inference - - attr_reader id: String - attr_reader job: InferenceJob - attr_reader model: InferenceModel - attr_reader file: InferenceFile + class Inference < Mindee::V2::Parsing::BaseInference attr_reader active_options: InferenceActiveOptions attr_reader result: InferenceResult diff --git a/sig/mindee/parsing/v2/inference_response.rbs b/sig/mindee/parsing/v2/inference_response.rbs index 7254f996..194af4fc 100644 --- a/sig/mindee/parsing/v2/inference_response.rbs +++ b/sig/mindee/parsing/v2/inference_response.rbs @@ -2,10 +2,15 @@ module Mindee module Parsing module V2 - class InferenceResponse < CommonResponse - attr_reader inference: V2::Inference + class InferenceResponse < Mindee::V2::Parsing::BaseResponse[Mindee::Parsing::V2::Inference] + + self.@_slug: String + self.@_params_type: singleton(Input::BaseParameters) + + attr_reader inference: Mindee::Parsing::V2::Inference def initialize: (Hash[String | Symbol, untyped]) -> void def to_s: -> String + def self.result_slug: -> String end end end diff --git a/sig/mindee/v2/parsing/base_inference.rbs b/sig/mindee/v2/parsing/base_inference.rbs new file mode 100644 index 00000000..4c2feb91 --- /dev/null +++ b/sig/mindee/v2/parsing/base_inference.rbs @@ -0,0 +1,16 @@ +# lib/mindee/v2/parsing/base_inference.rb + +module Mindee + module V2 + module Parsing + class BaseInference + attr_reader job: Mindee::Parsing::V2::InferenceJob + attr_reader file: Mindee::Parsing::V2::InferenceFile + attr_reader id: String + attr_reader model: Mindee::Parsing::V2::InferenceModel + + def initialize: (Hash[String | Symbol, untyped]) -> void + end + end + end +end diff --git a/sig/mindee/v2/parsing/base_response.rbs b/sig/mindee/v2/parsing/base_response.rbs new file mode 100644 index 00000000..5b9cbc89 --- /dev/null +++ b/sig/mindee/v2/parsing/base_response.rbs @@ -0,0 +1,13 @@ +# lib/mindee/v2/parsing/base_response.rb + +module Mindee + module V2 + module Parsing + class BaseResponse[T] < Mindee::Parsing::V2::CommonResponse + self.@_slug: String + self.@_params_type: singleton(Input::BaseParameters) + attr_reader inference: T + end + end + end +end From 289270e8bdd4939b9c870969a91d3a5b46188f26 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 25 Feb 2026 12:16:29 +0100 Subject: [PATCH 02/23] implement v2 base (no tests) - steep & rubocop OK --- lib/mindee/client_v2.rb | 38 +++++++++++++++++------ lib/mindee/http/mindee_api_v2.rb | 18 ++++++++--- lib/mindee/input/base_parameters.rb | 33 +++++++++++++++++++- lib/mindee/input/inference_parameters.rb | 18 ++++++++++- sig/mindee/client_v2.rbs | 10 +++--- sig/mindee/http/mindee_api_v2.rbs | 7 +++-- sig/mindee/input/base_parameters.rbs | 13 ++++++-- sig/mindee/input/inference_parameters.rbs | 1 + sig/mindee/v2/parsing/base_response.rbs | 6 ++++ 9 files changed, 120 insertions(+), 24 deletions(-) diff --git a/lib/mindee/client_v2.rb b/lib/mindee/client_v2.rb index 57ef00b3..01dbdf3b 100644 --- a/lib/mindee/client_v2.rb +++ b/lib/mindee/client_v2.rb @@ -26,6 +26,14 @@ def get_inference(inference_id) @mindee_api.req_get_inference(inference_id) end + # Retrieves a result from a given queue. + # @param inference_id [String] + # @param response_class [Class] + # @return [Mindee::Parsing::V2::BaseResponse] + def get_result(response_class, inference_id) + @mindee_api.req_get_result(response_class, inference_id) + end + # Retrieves an inference. # @param job_id [String] # @return [Mindee::Parsing::V2::JobResponse] @@ -36,13 +44,25 @@ def get_job(job_id) # Enqueue a document for async parsing. # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] # The source of the input document (local file or URL). - # @param params [Hash, BaseParameters] + # @param params [Hash, InferenceParameters] + # @return [Mindee::Parsing::V2::JobResponse] + def enqueue_inference(input_source, params, disable_redundant_warnings: false) + unless disable_redundant_warnings + warn '[DEPRECATION] `enqueue_inference` is deprecated; use `enqueue` instead.', uplevel: 1 + end + normalized_params = normalize_parameters(Input::InferenceParameters, params) + enqueue(input_source, normalized_params) + end + + # Enqueue a document for async parsing. + # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] + # The source of the input document (local file or URL). + # @param params [BaseParameters] # @return [Mindee::Parsing::V2::JobResponse] - def enqueue_inference(input_source, params) - normalized_params = normalize_inference_parameters(Input::InferenceParameters, params) - logger.debug("Enqueueing document to model '#{normalized_params.model_id}'.") + def enqueue(input_source, params) + logger.debug("Enqueueing document to model '#{params.model_id}'.") - @mindee_api.req_post_inference_enqueue(input_source, normalized_params) + @mindee_api.req_post_enqueue(input_source, params) end # Enqueues to an asynchronous endpoint and automatically polls for a response. @@ -57,9 +77,9 @@ def enqueue_and_get_result( input_source, params ) - normalized_params = normalize_inference_parameters(response_type._params_type, params) + normalized_params = normalize_parameters(response_type._params_type, params) normalized_params.validate_async_params - enqueue_response = enqueue_inference(input_source, normalized_params) + enqueue_response = enqueue(input_source, normalized_params) if enqueue_response.job.id.nil? || enqueue_response.job.id.empty? logger.error("Failed enqueueing:\n#{enqueue_response.raw_http}") @@ -77,7 +97,7 @@ def enqueue_and_get_result( if poll_results.job.status == 'Failed' break elsif poll_results.job.status == 'Processed' - return get_inference(poll_results.job.id) + return get_result(response_type, poll_results.job.id) end logger.debug( @@ -122,7 +142,7 @@ def enqueue_and_get_inference(input_source, params) # If needed, converts the parsing options provided as a hash into a proper InferenceParameters object. # @param params [Hash, Class] Params. # @return [BaseParameters] - def normalize_inference_parameters(param_class, params) + def normalize_parameters(param_class, params) return param_class.from_hash(params: params) if params.is_a?(Hash) params diff --git a/lib/mindee/http/mindee_api_v2.rb b/lib/mindee/http/mindee_api_v2.rb index 0e239464..d3b32e63 100644 --- a/lib/mindee/http/mindee_api_v2.rb +++ b/lib/mindee/http/mindee_api_v2.rb @@ -23,7 +23,7 @@ def initialize(api_key: nil) # @param params [Input::InferenceParameters] # @return [Mindee::Parsing::V2::JobResponse] # @raise [Mindee::Errors::MindeeHttpErrorV2] - def req_post_inference_enqueue(input_source, params) + def req_post_enqueue(input_source, params) @settings.check_api_key response = enqueue( input_source, @@ -140,6 +140,15 @@ def inference_result_req_get(queue_id) poll("#{@settings.base_url}/inferences/#{queue_id}") end + # Polls the API for the result of an inference. + # + # @param queue_id [String] ID of the queue. + # @param response_class [Class] + # @return [Net::HTTPResponse] + def result_req_get(queue_id, response_class) + poll("#{@settings.base_url}/products/#{response_class._slug}/results/#{queue_id}") + end + # Handle parameters for the enqueue form # @param form_data [Array] Array of form fields # @param params [Input::InferenceParameters] Inference options. @@ -159,10 +168,10 @@ def enqueue_form_options(form_data, params) end # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] - # @param params [Input::InferenceParameters] Inference options. + # @param params [Input::BaseParameters] Inference options. # @return [Net::HTTPResponse, nil] def enqueue(input_source, params) - uri = URI("#{@settings.base_url}/inferences/enqueue") + uri = URI("#{@settings.base_url}/products/#{params._slug}/enqueue") form_data = if input_source.is_a?(Mindee::Input::Source::URLInputSource) [['url', input_source.url]] # : Array[untyped] @@ -172,8 +181,7 @@ def enqueue(input_source, params) end form_data.push(['model_id', params.model_id]) - # deal with other parameters - form_data = enqueue_form_options(form_data, params) + form_data = params.append_form_data(form_data) headers = { 'Authorization' => @settings.api_key, diff --git a/lib/mindee/input/base_parameters.rb b/lib/mindee/input/base_parameters.rb index 9a989c3e..0e17f06f 100644 --- a/lib/mindee/input/base_parameters.rb +++ b/lib/mindee/input/base_parameters.rb @@ -20,6 +20,7 @@ class BaseParameters attr_reader :close_file # @return [String] Slug for the endpoint. + @_slug = '' # @param [String] model_id ID of the model # @param [String, nil] file_alias File alias, if applicable. @@ -42,10 +43,21 @@ def initialize( @close_file = close_file.nil? || close_file end + def self.from_hash(params: {}) + load_from_hash(params: params) + new( + params[:model_id], + file_alias: params[:file_alias], + webhook_ids: params[:webhook_ids], + polling_options: params[:polling_options], + close_file: params[:close_file] + ) + end + # Loads a prediction from a Hash. # @param [Hash] params Parameters to provide as a hash. # @return [Hash] - def self.from_hash(params: {}) + def self.load_from_hash(params: {}) params.transform_keys!(&:to_sym) if params.empty? || params[:model_id].nil? || params[:model_id].empty? @@ -64,6 +76,25 @@ def self.from_hash(params: {}) params end + # Appends base form data to the provided array. + # @param [Array] form_data Array of form fields + # @return [Array] + def append_form_data(form_data) + form_data.push(['file_alias', @file_alias]) if @file_alias + webhook_ids = @webhook_ids || [] + form_data.push(['webhook_ids', webhook_ids.join(',')]) unless @webhook_ids.nil? || webhook_ids.empty? + form_data + end + + # @return [String] Slug for the endpoint. + def _slug + if self == BaseParameters + raise NotImplementedError, 'Cannot access `slug` directly on the BaseParameters class. Please use a subclass.' + end + + @_slug + end + private # Cleans a proper polling options object potentially from a hash. diff --git a/lib/mindee/input/inference_parameters.rb b/lib/mindee/input/inference_parameters.rb index 4c7f9e82..be74bde0 100644 --- a/lib/mindee/input/inference_parameters.rb +++ b/lib/mindee/input/inference_parameters.rb @@ -73,6 +73,22 @@ def initialize( # rubocop:enable Metrics/ParameterLists end + # Appends inference-specific form data to the provided array. + # @param [Array] form_data Array of form fields + # @return [Array] + def append_form_data(form_data) + new_form_data = super + + new_form_data.push(['rag', @rag.to_s]) unless @rag.nil? + new_form_data.push(['raw_text', @raw_text.to_s]) unless @raw_text.nil? + new_form_data.push(['polygon', @polygon.to_s]) unless @polygon.nil? + new_form_data.push(['confidence', @confidence.to_s]) unless @confidence.nil? + new_form_data.push(['text_context', @text_context]) if @text_context + new_form_data.push(['data_schema', @data_schema.to_s]) if @data_schema + + new_form_data + end + # Validates the parameters for async auto-polling def validate_async_params min_delay_sec = 1 @@ -101,7 +117,7 @@ def self.from_hash(params: {}) raw_text = params.fetch(:raw_text, nil) polygon = params.fetch(:polygon, nil) confidence = params.fetch(:confidence, nil) - base_params = super + base_params = load_from_hash(params: params) new_params = base_params.merge(rag: rag, raw_text: raw_text, polygon: polygon, confidence: confidence) model_id = new_params.fetch(:model_id) diff --git a/sig/mindee/client_v2.rbs b/sig/mindee/client_v2.rbs index 56b82357..42dd931a 100644 --- a/sig/mindee/client_v2.rbs +++ b/sig/mindee/client_v2.rbs @@ -3,24 +3,26 @@ OTS_OWNER: String module Mindee interface _ParametersFactory[T] - def new: (Hash[String | Symbol, untyped]) -> T def from_hash: (params: untyped) -> T end class ClientV2 attr_reader mindee_api: HTTP::MindeeApiV2 + def logger: () -> Logger def initialize: (?api_key: String) -> void def get_inference: (String) -> Mindee::Parsing::V2::InferenceResponse + def get_result: [T] (HTTP::_ResponseFactory[T] response_class, String inference_id) -> T def get_job: (String) -> Mindee::Parsing::V2::JobResponse - def enqueue_inference: [T < Input::BaseParameters] (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | T) -> Mindee::Parsing::V2::JobResponse - def enqueue_and_get_result: [T < Input::BaseParameters] ( + def enqueue_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters, disable_redundant_warnings: bool) -> Mindee::Parsing::V2::JobResponse + def enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::BaseParameters) -> Mindee::Parsing::V2::JobResponse + def enqueue_and_get_result: [T < Input::BaseParameters] ( singleton(Mindee::V2::Parsing::BaseResponse), Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | T ) -> Mindee::V2::Parsing::BaseResponse[untyped] def enqueue_and_get_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters) -> Mindee::Parsing::V2::InferenceResponse def validate_async_params: (Integer | Float, Integer | Float, Integer) -> void - def normalize_inference_parameters: [T < Input::BaseParameters] (_ParametersFactory[T], Hash[String | Symbol, untyped] | T) -> T + def normalize_parameters: [T < Input::BaseParameters] (_ParametersFactory[T], Hash[String | Symbol, untyped] | T) -> T end end diff --git a/sig/mindee/http/mindee_api_v2.rbs b/sig/mindee/http/mindee_api_v2.rbs index 8aa5f85e..d08effbe 100644 --- a/sig/mindee/http/mindee_api_v2.rbs +++ b/sig/mindee/http/mindee_api_v2.rbs @@ -3,6 +3,7 @@ module Mindee module HTTP interface _ResponseFactory[T] def new: (Hash[String | Symbol, untyped]) -> T + def _slug: () -> String end class MindeeApiV2 @@ -14,18 +15,20 @@ module Mindee def req_get_result: [T] (_ResponseFactory[T] response_class, String inference_id) -> T def req_get_result_url: [T] (_ResponseFactory[T] result_class, String url) -> T - def req_post_inference_enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::InferenceParameters) -> Parsing::V2::JobResponse + def req_post_enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::BaseParameters) -> Parsing::V2::JobResponse def req_get_inference: (String) -> Parsing::V2::InferenceResponse def req_get_job: (String) -> Parsing::V2::JobResponse def process_response: (Net::HTTPResponse?) -> Hash[String | Symbol, untyped] def poll: (String) -> Net::HTTPResponse def inference_job_req_get: (String) -> Net::HTTPResponse def inference_result_req_get: (String) -> Net::HTTPResponse - def enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::InferenceParameters) -> Net::HTTPResponse? + def result_req_get: [T] (String, _ResponseFactory[T] result_class) -> Net::HTTPResponse + def enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::BaseParameters) -> Net::HTTPResponse? private def enqueue_form_options: (Array[untyped], Input::InferenceParameters) -> Array[untyped] + end end end diff --git a/sig/mindee/input/base_parameters.rbs b/sig/mindee/input/base_parameters.rbs index 4cb13d0f..ef9586b1 100644 --- a/sig/mindee/input/base_parameters.rbs +++ b/sig/mindee/input/base_parameters.rbs @@ -3,14 +3,19 @@ module Mindee module Input class BaseParameters - def self.from_hash: (params: Hash[String | Symbol, untyped]) -> Hash[String | Symbol, untyped] + self.@_slug: String + + def self.from_hash: (params: Hash[String | Symbol, untyped]) -> instance + def self.load_from_hash: (params: Hash[String | Symbol, untyped]) -> Hash[String | Symbol, untyped] + + def _slug: -> String attr_reader model_id: String attr_reader file_alias: String? attr_reader webhook_ids: Array[String]? attr_reader polling_options: PollingOptions attr_reader close_file: bool - @slug: String + self.@slug: String def initialize: ( String, @@ -20,6 +25,10 @@ module Mindee ?close_file: bool? ) -> void + def append_form_data: (Array[Array[untyped]]) -> Array[Array[untyped]] + + def validate_async_params: () -> void + private def get_clean_polling_options: (Hash[String | Symbol, untyped] | PollingOptions?) -> PollingOptions diff --git a/sig/mindee/input/inference_parameters.rbs b/sig/mindee/input/inference_parameters.rbs index d351b97f..bff0c25c 100644 --- a/sig/mindee/input/inference_parameters.rbs +++ b/sig/mindee/input/inference_parameters.rbs @@ -26,6 +26,7 @@ module Mindee ) -> void def self.from_hash: (params: Hash[String | Symbol, untyped]) -> InferenceParameters + def append_form_data: (Array[Array[untyped]]) -> Array[Array[untyped]] def validate_async_params: -> void end diff --git a/sig/mindee/v2/parsing/base_response.rbs b/sig/mindee/v2/parsing/base_response.rbs index 5b9cbc89..765ea228 100644 --- a/sig/mindee/v2/parsing/base_response.rbs +++ b/sig/mindee/v2/parsing/base_response.rbs @@ -3,9 +3,15 @@ module Mindee module V2 module Parsing + interface _ParametersFactory[T] + def new: (Hash[String | Symbol, untyped]) -> T + def from_hash: (params: untyped) -> T + end class BaseResponse[T] < Mindee::Parsing::V2::CommonResponse self.@_slug: String self.@_params_type: singleton(Input::BaseParameters) + def self._params_type: () -> singleton(Input::BaseParameters) + def self._slug: () -> String attr_reader inference: T end end From 8322a71d1afb59964d5abf3abf9d90f76e1aa8b4 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 25 Feb 2026 12:24:01 +0100 Subject: [PATCH 03/23] fix test --- lib/mindee/parsing/v2/inference_response.rb | 5 ----- lib/mindee/v2/parsing/base_response.rb | 8 ++++++-- sig/mindee/parsing/v2/inference_response.rbs | 6 +++++- sig/mindee/v2/parsing/base_response.rbs | 1 + 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/lib/mindee/parsing/v2/inference_response.rb b/lib/mindee/parsing/v2/inference_response.rb index bcdcfe05..f1806387 100644 --- a/lib/mindee/parsing/v2/inference_response.rb +++ b/lib/mindee/parsing/v2/inference_response.rb @@ -27,11 +27,6 @@ def initialize(server_response) def to_s @inference.to_s end - - # @return[String] Getter for the inference slug. - def self.result_slug - @_slug - end end end end diff --git a/lib/mindee/v2/parsing/base_response.rb b/lib/mindee/v2/parsing/base_response.rb index 7c310486..68e0a68c 100644 --- a/lib/mindee/v2/parsing/base_response.rb +++ b/lib/mindee/v2/parsing/base_response.rb @@ -9,9 +9,13 @@ class BaseResponse < Mindee::Parsing::V2::CommonResponse attr_reader :inference # @return [String] The slug of the endpoint used for this response - @_slug = 'extraction/results' + @_slug = '' # @return [Class] The class of the parameters used for this response - @_params_type + @_params_type = Mindee::Input::BaseParameters + + class << self + attr_reader :_params_type, :_slug + end end end end diff --git a/sig/mindee/parsing/v2/inference_response.rbs b/sig/mindee/parsing/v2/inference_response.rbs index 194af4fc..1160781f 100644 --- a/sig/mindee/parsing/v2/inference_response.rbs +++ b/sig/mindee/parsing/v2/inference_response.rbs @@ -9,8 +9,12 @@ module Mindee attr_reader inference: Mindee::Parsing::V2::Inference def initialize: (Hash[String | Symbol, untyped]) -> void + + def _params_type: -> singleton(Input::BaseParameters) + def to_s: -> String - def self.result_slug: -> String + def self._params_type: () -> singleton(Input::BaseParameters) + def self._slug: () -> String end end end diff --git a/sig/mindee/v2/parsing/base_response.rbs b/sig/mindee/v2/parsing/base_response.rbs index 765ea228..5cdcb0e1 100644 --- a/sig/mindee/v2/parsing/base_response.rbs +++ b/sig/mindee/v2/parsing/base_response.rbs @@ -10,6 +10,7 @@ module Mindee class BaseResponse[T] < Mindee::Parsing::V2::CommonResponse self.@_slug: String self.@_params_type: singleton(Input::BaseParameters) + def self._params_type: () -> singleton(Input::BaseParameters) def self._slug: () -> String attr_reader inference: T From 5a779681b18547639707e8d71d9ecea020ab039b Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 25 Feb 2026 12:56:30 +0100 Subject: [PATCH 04/23] add crop (no tests) --- lib/mindee/input/base_parameters.rb | 26 +++++++++-- lib/mindee/input/inference_parameters.rb | 22 +-------- lib/mindee/parsing/v2/inference.rb | 2 - lib/mindee/parsing/v2/inference_response.rb | 2 +- lib/mindee/v2/parsing/base_inference.rb | 2 + .../v2/product/crop/Params/crop_parameters.rb | 45 +++++++++++++++++++ lib/mindee/v2/product/crop/crop_inference.rb | 32 +++++++++++++ lib/mindee/v2/product/crop/crop_item.rb | 29 ++++++++++++ lib/mindee/v2/product/crop/crop_response.rb | 33 ++++++++++++++ lib/mindee/v2/product/crop/crop_result.rb | 34 ++++++++++++++ sig/mindee/input/inference_parameters.rbs | 2 - sig/mindee/parsing/v2/inference_response.rbs | 4 +- sig/mindee/v2/product/crop/crop_inference.rbs | 14 ++++++ sig/mindee/v2/product/crop/crop_item.rbs | 15 +++++++ sig/mindee/v2/product/crop/crop_response.rbs | 23 ++++++++++ sig/mindee/v2/product/crop/crop_result.rbs | 14 ++++++ .../crop_parameters/crop_parameters.rbs | 25 +++++++++++ 17 files changed, 293 insertions(+), 31 deletions(-) create mode 100644 lib/mindee/v2/product/crop/Params/crop_parameters.rb create mode 100644 lib/mindee/v2/product/crop/crop_inference.rb create mode 100644 lib/mindee/v2/product/crop/crop_item.rb create mode 100644 lib/mindee/v2/product/crop/crop_response.rb create mode 100644 lib/mindee/v2/product/crop/crop_result.rb create mode 100644 sig/mindee/v2/product/crop/crop_inference.rbs create mode 100644 sig/mindee/v2/product/crop/crop_item.rbs create mode 100644 sig/mindee/v2/product/crop/crop_response.rbs create mode 100644 sig/mindee/v2/product/crop/crop_result.rbs create mode 100644 sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs diff --git a/lib/mindee/input/base_parameters.rb b/lib/mindee/input/base_parameters.rb index 0e17f06f..15eaebf5 100644 --- a/lib/mindee/input/base_parameters.rb +++ b/lib/mindee/input/base_parameters.rb @@ -24,9 +24,9 @@ class BaseParameters # @param [String] model_id ID of the model # @param [String, nil] file_alias File alias, if applicable. - # @param [Array, nil] webhook_ids - # @param [Hash, nil] polling_options - # @param [Boolean, nil] close_file + # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. + # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Boolean, nil] close_file Whether to close the file after parsing. def initialize( model_id, file_alias: nil, @@ -95,6 +95,26 @@ def _slug @_slug end + # Validates the parameters for async auto-polling + def validate_async_params + min_delay_sec = 1 + min_initial_delay_sec = 1 + min_retries = 2 + + if @polling_options.delay_sec < min_delay_sec + raise ArgumentError, + "Cannot set auto-poll delay to less than #{min_delay_sec} second(s)" + end + if @polling_options.initial_delay_sec < min_initial_delay_sec + raise ArgumentError, + "Cannot set initial parsing delay to less than #{min_initial_delay_sec} second(s)" + end + return unless @polling_options.max_retries < min_retries + + raise ArgumentError, + "Cannot set auto-poll retries to less than #{min_retries}" + end + private # Cleans a proper polling options object potentially from a hash. diff --git a/lib/mindee/input/inference_parameters.rb b/lib/mindee/input/inference_parameters.rb index be74bde0..060d015d 100644 --- a/lib/mindee/input/inference_parameters.rb +++ b/lib/mindee/input/inference_parameters.rb @@ -89,27 +89,7 @@ def append_form_data(form_data) new_form_data end - # Validates the parameters for async auto-polling - def validate_async_params - min_delay_sec = 1 - min_initial_delay_sec = 1 - min_retries = 2 - - if @polling_options.delay_sec < min_delay_sec - raise ArgumentError, - "Cannot set auto-poll delay to less than #{min_delay_sec} second(s)" - end - if @polling_options.initial_delay_sec < min_initial_delay_sec - raise ArgumentError, - "Cannot set initial parsing delay to less than #{min_initial_delay_sec} second(s)" - end - return unless @polling_options.max_retries < min_retries - - raise ArgumentError, - "Cannot set auto-poll retries to less than #{min_retries}" - end - - # Loads a prediction from a Hash. + # Loads the parameters from a Hash. # @param [Hash] params Parameters to provide as a hash. # @return [InferenceParameters] def self.from_hash(params: {}) diff --git a/lib/mindee/parsing/v2/inference.rb b/lib/mindee/parsing/v2/inference.rb index b4bb975c..495134bd 100644 --- a/lib/mindee/parsing/v2/inference.rb +++ b/lib/mindee/parsing/v2/inference.rb @@ -19,8 +19,6 @@ class Inference < Mindee::V2::Parsing::BaseInference # @param server_response [Hash] Hash representation of the JSON returned by the service. def initialize(server_response) - raise ArgumentError, 'server_response must be a Hash' unless server_response.is_a?(Hash) - super @active_options = InferenceActiveOptions.new(server_response['active_options']) @result = InferenceResult.new(server_response['result']) diff --git a/lib/mindee/parsing/v2/inference_response.rb b/lib/mindee/parsing/v2/inference_response.rb index f1806387..fd1d2750 100644 --- a/lib/mindee/parsing/v2/inference_response.rb +++ b/lib/mindee/parsing/v2/inference_response.rb @@ -12,7 +12,7 @@ class InferenceResponse < Mindee::V2::Parsing::BaseResponse # @return [Inference] Parsed inference payload. attr_reader :inference - @_slug = 'extraction/results' + @_slug = 'extraction' @_params_type = Input::InferenceParameters # @param server_response [Hash] Hash parsed from the API JSON response. diff --git a/lib/mindee/v2/parsing/base_inference.rb b/lib/mindee/v2/parsing/base_inference.rb index 6bb247b0..0a9daebb 100644 --- a/lib/mindee/v2/parsing/base_inference.rb +++ b/lib/mindee/v2/parsing/base_inference.rb @@ -15,6 +15,8 @@ class BaseInference attr_reader :id def initialize(http_response) + raise ArgumentError, 'Server response must be a Hash' unless http_response.is_a?(Hash) + @model = Mindee::Parsing::V2::InferenceModel.new(http_response['model']) @file = Mindee::Parsing::V2::InferenceFile.new(http_response['file']) @id = http_response['id'] diff --git a/lib/mindee/v2/product/crop/Params/crop_parameters.rb b/lib/mindee/v2/product/crop/Params/crop_parameters.rb new file mode 100644 index 00000000..82a0288a --- /dev/null +++ b/lib/mindee/v2/product/crop/Params/crop_parameters.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Product + module Crop + module Params + class CropParameters < Input::BaseParameters + # Parameters accepted by the crop utility v2 endpoint. + class CropParameters < Mindee::Input::BaseParameters + @_slug = 'crop' + + # @param [String] model_id ID of the model + # @param [String, nil] file_alias File alias, if applicable. + # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. + # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Boolean, nil] close_file Whether to close the file after parsing. + def initialize( + model_id, + file_alias: nil, + webhook_ids: nil, + polling_options: nil, + close_file: true + ) + super + end + + # Loads the parameters from a Hash. + # @param [Hash] params Parameters to provide as a hash. + # @return [CropParameters] + def self.from_hash(params: {}) + CropParameters.new( + params.fetch(:model_id), + file_alias: params.fetch(:file_alias, nil), + webhook_ids: params.fetch(:webhook_ids, nil), + close_file: params.fetch(:close_file, true) + ) + end + end + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/crop/crop_inference.rb b/lib/mindee/v2/product/crop/crop_inference.rb new file mode 100644 index 00000000..841a1c7d --- /dev/null +++ b/lib/mindee/v2/product/crop/crop_inference.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Product + module Crop + # The inference result for a crop utility request. + class CropInference + # @return [CropResult] Parsed inference payload. + attr_reader :result + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + super + + @result = CropResult.new(server_response['result']) + end + + # String representation. + # @return [String] + def to_s + [ + super, + @result, + '', + ].join("\n") + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/crop/crop_item.rb b/lib/mindee/v2/product/crop/crop_item.rb new file mode 100644 index 00000000..036b8c43 --- /dev/null +++ b/lib/mindee/v2/product/crop/crop_item.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Product + module Crop + # Result of a cropped document region. + class CropItem + # @return [String] Type or classification of the detected object. + attr_reader :object_type + # @return [Parsing::V2::Field::FieldLocation] Coordinates of the detected object on the document. + attr_reader :location + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + @object_type = server_response['object_type'] + @location = Mindee::Parsing::V2::Field::FieldLocation.new(server_response['location']) + end + + # String representation. + # @return [String] + def to_s + "* :Location: #{location}\n :Object Type: #{object_type}" + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/crop/crop_response.rb b/lib/mindee/v2/product/crop/crop_response.rb new file mode 100644 index 00000000..09a809bb --- /dev/null +++ b/lib/mindee/v2/product/crop/crop_response.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +require_relative '../../parsing/base_response' + +module Mindee + module V2 + module Product + module Crop + # HTTP response wrapper that embeds a V2 Inference. + class CropResponse < Mindee::V2::Parsing::BaseResponse + # @return [CropInference] Parsed inference payload. + attr_reader :inference + + @_slug = 'crop' + @_params_type = Params::CropParameters + + # @param server_response [Hash] Hash parsed from the API JSON response. + def initialize(server_response) + super + + @inference = CropInference.new(server_response['inference']) + end + + # String representation. + # @return [String] + def to_s + @inference.to_s + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/crop/crop_result.rb b/lib/mindee/v2/product/crop/crop_result.rb new file mode 100644 index 00000000..8135041c --- /dev/null +++ b/lib/mindee/v2/product/crop/crop_result.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Product + module Crop + # Result of a crop utility inference. + class CropResult + # @return [Array] List of results of cropped document regions. + attr_reader :crops + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + @crops = if server_response.key?('crops') + server_response['crop'].map do |crop| + CropItem.new(crop) + end + else + [] + end + end + + # String representation. + # @return [String] + def to_s + crops_str = @crops.join("\n") + + "Crops\n=====\n#{crops_str}" + end + end + end + end + end +end diff --git a/sig/mindee/input/inference_parameters.rbs b/sig/mindee/input/inference_parameters.rbs index bff0c25c..6d0f1404 100644 --- a/sig/mindee/input/inference_parameters.rbs +++ b/sig/mindee/input/inference_parameters.rbs @@ -27,8 +27,6 @@ module Mindee def self.from_hash: (params: Hash[String | Symbol, untyped]) -> InferenceParameters def append_form_data: (Array[Array[untyped]]) -> Array[Array[untyped]] - - def validate_async_params: -> void end end end diff --git a/sig/mindee/parsing/v2/inference_response.rbs b/sig/mindee/parsing/v2/inference_response.rbs index 1160781f..ae90cf40 100644 --- a/sig/mindee/parsing/v2/inference_response.rbs +++ b/sig/mindee/parsing/v2/inference_response.rbs @@ -10,10 +10,10 @@ module Mindee attr_reader inference: Mindee::Parsing::V2::Inference def initialize: (Hash[String | Symbol, untyped]) -> void - def _params_type: -> singleton(Input::BaseParameters) + def _params_type: -> singleton(Input::InferenceParameters) def to_s: -> String - def self._params_type: () -> singleton(Input::BaseParameters) + def self._params_type: () -> singleton(Input::InferenceParameters) def self._slug: () -> String end end diff --git a/sig/mindee/v2/product/crop/crop_inference.rbs b/sig/mindee/v2/product/crop/crop_inference.rbs new file mode 100644 index 00000000..63ea5b93 --- /dev/null +++ b/sig/mindee/v2/product/crop/crop_inference.rbs @@ -0,0 +1,14 @@ +module Mindee + module V2 + module Product + module Crop + class CropInference + attr_reader result: CropResult + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/crop/crop_item.rbs b/sig/mindee/v2/product/crop/crop_item.rbs new file mode 100644 index 00000000..1e3e7998 --- /dev/null +++ b/sig/mindee/v2/product/crop/crop_item.rbs @@ -0,0 +1,15 @@ +module Mindee + module V2 + module Product + module Crop + class CropItem + attr_reader object_type: String + attr_reader location: Mindee::Parsing::V2::Field::FieldLocation + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/crop/crop_response.rbs b/sig/mindee/v2/product/crop/crop_response.rbs new file mode 100644 index 00000000..1e5663ef --- /dev/null +++ b/sig/mindee/v2/product/crop/crop_response.rbs @@ -0,0 +1,23 @@ +# lib/mindee/v2/product/crop/crop_response.rb + +module Mindee + module V2 + module Product + module Crop + class CropResponse + self.@_slug: String + self.@_params_type: singleton(Params::CropParameters) + + attr_reader inference: Mindee::V2::Product::Crop::CropInference + def initialize: (Hash[String | Symbol, untyped]) -> void + + def _params_type: -> singleton(Params::CropParameters) + + def to_s: -> String + def self._params_type: () -> singleton(Params::CropParameters) + def self._slug: () -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/crop/crop_result.rbs b/sig/mindee/v2/product/crop/crop_result.rbs new file mode 100644 index 00000000..8e06576b --- /dev/null +++ b/sig/mindee/v2/product/crop/crop_result.rbs @@ -0,0 +1,14 @@ +module Mindee + module V2 + module Product + module Crop + class CropResult + @crops: Array[CropItem] + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs b/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs new file mode 100644 index 00000000..0b451590 --- /dev/null +++ b/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs @@ -0,0 +1,25 @@ +module Mindee + module V2 + module Product + module Crop + module Params + module CropParameters + class CropParameters + self.@_slug: String + + def self.from_hash: (params: Hash[String | Symbol, untyped]) -> CropParameters + + def initialize: ( + String, + ?file_alias: String?, + ?webhook_ids: Array[String]?, + ?polling_options: Hash[Symbol | String, untyped] | Input::PollingOptions?, + ?close_file: bool? + ) -> void + end + end + end + end + end + end +end From 3e3144c07a0eebb11f143c01a4310a1cdab9652d Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 25 Feb 2026 13:29:25 +0100 Subject: [PATCH 05/23] add (unit) test --- lib/mindee.rb | 3 + lib/mindee/geometry/point.rb | 5 ++ lib/mindee/geometry/polygon.rb | 5 ++ lib/mindee/parsing/v2/field/field_location.rb | 2 +- lib/mindee/v2.rb | 3 + lib/mindee/v2/product.rb | 4 + lib/mindee/v2/product/crop/crop_inference.rb | 12 ++- lib/mindee/v2/product/crop/crop_response.rb | 4 +- lib/mindee/v2/product/crop/crop_result.rb | 6 +- .../{Params => params}/crop_parameters.rb | 0 sig/mindee/geometry/point.rbs | 2 + sig/mindee/geometry/polygon.rbs | 2 + sig/mindee/v2/product/crop/crop_result.rbs | 2 +- spec/data.rb | 1 + spec/v2/client_v2_integration.rb | 2 +- spec/v2/input/inference_parameter_spec.rb | 2 +- spec/v2/input/local_response_v2_spec.rb | 2 +- spec/v2/parsing/inference_spec.rb | 4 +- spec/v2/product/crop_spec.rb | 86 +++++++++++++++++++ 19 files changed, 133 insertions(+), 14 deletions(-) create mode 100644 lib/mindee/v2/product.rb rename lib/mindee/v2/product/crop/{Params => params}/crop_parameters.rb (100%) create mode 100644 spec/v2/product/crop_spec.rb diff --git a/lib/mindee.rb b/lib/mindee.rb index f885eacc..d05a9420 100644 --- a/lib/mindee.rb +++ b/lib/mindee.rb @@ -87,6 +87,9 @@ module US # V2-specific module. module V2 + # Product-specific module. + module Product + end end end diff --git a/lib/mindee/geometry/point.rb b/lib/mindee/geometry/point.rb index 18ff2135..3bc98e9a 100644 --- a/lib/mindee/geometry/point.rb +++ b/lib/mindee/geometry/point.rb @@ -31,6 +31,11 @@ def [](key) throw '0 or 1 only' end end + + # @return [String] Point as a string. + def to_s + "(#{@x},#{@y})" + end end end end diff --git a/lib/mindee/geometry/polygon.rb b/lib/mindee/geometry/polygon.rb index 28c7f446..e2e2b1ad 100644 --- a/lib/mindee/geometry/polygon.rb +++ b/lib/mindee/geometry/polygon.rb @@ -27,6 +27,11 @@ def point_in_y?(point) min_max = Geometry.get_min_max_y(self) point.y.between?(min_max.min, min_max.max) end + + # @return [String] Polygon as a string. + def to_s + "(#{map(&:to_s).join(', ')})" + end end end end diff --git a/lib/mindee/parsing/v2/field/field_location.rb b/lib/mindee/parsing/v2/field/field_location.rb index e6902988..d5869b57 100644 --- a/lib/mindee/parsing/v2/field/field_location.rb +++ b/lib/mindee/parsing/v2/field/field_location.rb @@ -24,7 +24,7 @@ def initialize(server_response) # # @return [String] def to_s - @polygon ? @polygon.to_s : '' + "#{polygon} on page #{page}" end end end diff --git a/lib/mindee/v2.rb b/lib/mindee/v2.rb index 8e9b8f90..6950e5f9 100644 --- a/lib/mindee/v2.rb +++ b/lib/mindee/v2.rb @@ -1 +1,4 @@ # frozen_string_literal: true + +require_relative 'parsing' +require_relative 'product' diff --git a/lib/mindee/v2/product.rb b/lib/mindee/v2/product.rb new file mode 100644 index 00000000..730e3696 --- /dev/null +++ b/lib/mindee/v2/product.rb @@ -0,0 +1,4 @@ +# frozen_string_literal: true + +require_relative 'product/crop/crop_response' +require_relative 'product/crop/params/crop_parameters' diff --git a/lib/mindee/v2/product/crop/crop_inference.rb b/lib/mindee/v2/product/crop/crop_inference.rb index 841a1c7d..7739949a 100644 --- a/lib/mindee/v2/product/crop/crop_inference.rb +++ b/lib/mindee/v2/product/crop/crop_inference.rb @@ -1,11 +1,13 @@ # frozen_string_literal: true +require_relative 'crop_result' + module Mindee module V2 module Product module Crop # The inference result for a crop utility request. - class CropInference + class CropInference < Mindee::V2::Parsing::BaseInference # @return [CropResult] Parsed inference payload. attr_reader :result @@ -20,8 +22,12 @@ def initialize(server_response) # @return [String] def to_s [ - super, - @result, + 'Inference', + '#########', + @job.to_s, + @model.to_s, + @file.to_s, + result.to_s, '', ].join("\n") end diff --git a/lib/mindee/v2/product/crop/crop_response.rb b/lib/mindee/v2/product/crop/crop_response.rb index 09a809bb..9a6c03e6 100644 --- a/lib/mindee/v2/product/crop/crop_response.rb +++ b/lib/mindee/v2/product/crop/crop_response.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true require_relative '../../parsing/base_response' +require_relative 'params/crop_parameters' +require_relative 'crop_inference' module Mindee module V2 @@ -12,7 +14,7 @@ class CropResponse < Mindee::V2::Parsing::BaseResponse attr_reader :inference @_slug = 'crop' - @_params_type = Params::CropParameters + @_params_type = Crop::Params::CropParameters # @param server_response [Hash] Hash parsed from the API JSON response. def initialize(server_response) diff --git a/lib/mindee/v2/product/crop/crop_result.rb b/lib/mindee/v2/product/crop/crop_result.rb index 8135041c..6f572fbe 100644 --- a/lib/mindee/v2/product/crop/crop_result.rb +++ b/lib/mindee/v2/product/crop/crop_result.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require_relative 'crop_item' + module Mindee module V2 module Product @@ -12,11 +14,9 @@ class CropResult # @param server_response [Hash] Hash representation of the JSON returned by the service. def initialize(server_response) @crops = if server_response.key?('crops') - server_response['crop'].map do |crop| + server_response['crops'].map do |crop| CropItem.new(crop) end - else - [] end end diff --git a/lib/mindee/v2/product/crop/Params/crop_parameters.rb b/lib/mindee/v2/product/crop/params/crop_parameters.rb similarity index 100% rename from lib/mindee/v2/product/crop/Params/crop_parameters.rb rename to lib/mindee/v2/product/crop/params/crop_parameters.rb diff --git a/sig/mindee/geometry/point.rbs b/sig/mindee/geometry/point.rbs index 9bac0dcf..f2cc620d 100644 --- a/sig/mindee/geometry/point.rbs +++ b/sig/mindee/geometry/point.rbs @@ -7,6 +7,8 @@ module Mindee def initialize: (Float, Float) -> void def []: (Integer) -> Float + + def to_s: -> String end end end diff --git a/sig/mindee/geometry/polygon.rbs b/sig/mindee/geometry/polygon.rbs index 19f8b7c5..23a42d5d 100644 --- a/sig/mindee/geometry/polygon.rbs +++ b/sig/mindee/geometry/polygon.rbs @@ -5,6 +5,8 @@ module Mindee def initialize: (Array[::Mindee::Geometry::Point | Array[Float]]) -> void def centroid: -> Point def point_in_y?: (Point) -> bool + + def to_s: -> String end end end diff --git a/sig/mindee/v2/product/crop/crop_result.rbs b/sig/mindee/v2/product/crop/crop_result.rbs index 8e06576b..67cd36e9 100644 --- a/sig/mindee/v2/product/crop/crop_result.rbs +++ b/sig/mindee/v2/product/crop/crop_result.rbs @@ -3,7 +3,7 @@ module Mindee module Product module Crop class CropResult - @crops: Array[CropItem] + attr_reader crops: Array[CropItem] def initialize: (Hash[String | Symbol, untyped]) -> void def to_s: -> String diff --git a/spec/data.rb b/spec/data.rb index 5b67c8ca..d2f73eeb 100644 --- a/spec/data.rb +++ b/spec/data.rb @@ -4,6 +4,7 @@ FILE_TYPES_DIR = File.join(ROOT_DATA_DIR, 'file_types').freeze V1_DATA_DIR = File.join(ROOT_DATA_DIR, 'v1').freeze V2_DATA_DIR = File.join(ROOT_DATA_DIR, 'v2').freeze +V2_PRODUCTS_DIR = File.join(V2_DATA_DIR, 'products').freeze V1_ASYNC_DIR = File.join(V1_DATA_DIR, 'async').freeze V1_PRODUCT_DATA_DIR = File.join(V1_DATA_DIR, 'products').freeze V1_OCR_DIR = File.join(V1_DATA_DIR, 'extras', 'ocr') diff --git a/spec/v2/client_v2_integration.rb b/spec/v2/client_v2_integration.rb index d5da7f10..722af829 100644 --- a/spec/v2/client_v2_integration.rb +++ b/spec/v2/client_v2_integration.rb @@ -219,7 +219,7 @@ context 'A Data Schema Override' do it 'Overrides successfully' do - data_schema_replace = File.read(File.join(V2_DATA_DIR, 'products', 'extraction', + data_schema_replace = File.read(File.join(V2_PRODUCTS_DIR, 'extraction', 'data_schema_replace_param.json')) input = Mindee::Input::Source::PathInputSource.new(File.join(FILE_TYPES_DIR, 'pdf', 'blank_1.pdf')) diff --git a/spec/v2/input/inference_parameter_spec.rb b/spec/v2/input/inference_parameter_spec.rb index af0ccd6a..6153bd23 100644 --- a/spec/v2/input/inference_parameter_spec.rb +++ b/spec/v2/input/inference_parameter_spec.rb @@ -5,7 +5,7 @@ describe Mindee::Input::InferenceParameters do let(:extracted_schema_content) do - File.read(File.join(V2_DATA_DIR, 'products', 'extraction', 'data_schema_replace_param.json')) + File.read(File.join(V2_PRODUCTS_DIR, 'extraction', 'data_schema_replace_param.json')) end let(:extracted_schema_hash) { JSON.parse(extracted_schema_content) } let(:extracted_schema_str) { extracted_schema_hash.to_json } diff --git a/spec/v2/input/local_response_v2_spec.rb b/spec/v2/input/local_response_v2_spec.rb index f27a5961..85065e77 100644 --- a/spec/v2/input/local_response_v2_spec.rb +++ b/spec/v2/input/local_response_v2_spec.rb @@ -18,7 +18,7 @@ def assert_local_response(local_response) end describe Mindee::Input::LocalResponse do - let(:file_path) { File.join(V2_DATA_DIR, 'products', 'extraction', 'standard_field_types.json') } + let(:file_path) { File.join(V2_PRODUCTS_DIR, 'extraction', 'standard_field_types.json') } context 'A V2 local response' do it 'should load from a path' do response = Mindee::Input::LocalResponse.new(file_path) diff --git a/spec/v2/parsing/inference_spec.rb b/spec/v2/parsing/inference_spec.rb index f0c496c6..7986a555 100644 --- a/spec/v2/parsing/inference_spec.rb +++ b/spec/v2/parsing/inference_spec.rb @@ -3,8 +3,8 @@ require 'mindee' RSpec.describe 'inference' do - let(:findoc_path) { File.join(V2_DATA_DIR, 'products', 'extraction', 'financial_document') } - let(:extraction_path) { File.join(V2_DATA_DIR, 'products', 'extraction') } + let(:findoc_path) { File.join(V2_PRODUCTS_DIR, 'extraction', 'financial_document') } + let(:extraction_path) { File.join(V2_PRODUCTS_DIR, 'extraction') } let(:deep_nested_field_path) { File.join(extraction_path, 'deep_nested_fields.json') } let(:standard_field_path) { File.join(extraction_path, 'standard_field_types.json') } let(:standard_field_rst_path) { File.join(extraction_path, 'standard_field_types.rst') } diff --git a/spec/v2/product/crop_spec.rb b/spec/v2/product/crop_spec.rb new file mode 100644 index 00000000..ee38a097 --- /dev/null +++ b/spec/v2/product/crop_spec.rb @@ -0,0 +1,86 @@ +# frozen_string_literal: true + +require 'json' +require 'mindee/v2/product' + +require_relative '../../data' + +describe Mindee::V2::Product::Crop::CropResponse do + let(:crop_data_dir) { File.join(V2_PRODUCTS_DIR, 'crop') } + + it 'parses a single crop properly' do + json_path = File.join(crop_data_dir, 'crop_single.json') + rst_path = File.join(crop_data_dir, 'crop_single.rst') + + json_sample = JSON.parse(File.read(json_path)) + rst_sample = File.read(rst_path) + + response = Mindee::V2::Product::Crop::CropResponse.new(json_sample) + + expect(response.inference).to be_a(Mindee::V2::Product::Crop::CropInference) + expect(response.inference.result.crops).not_to be_empty + + crop = response.inference.result.crops[0] + expect(crop.location.polygon.size).to eq(4) + expect(crop.location.polygon[0][0]).to eq(0.15) + expect(crop.location.polygon[0][1]).to eq(0.254) + expect(crop.location.polygon[1][0]).to eq(0.85) + expect(crop.location.polygon[1][1]).to eq(0.254) + expect(crop.location.polygon[2][0]).to eq(0.85) + expect(crop.location.polygon[2][1]).to eq(0.947) + expect(crop.location.polygon[3][0]).to eq(0.15) + expect(crop.location.polygon[3][1]).to eq(0.947) + + expect(crop.location.page).to eq(0) + expect(crop.object_type).to eq('invoice') + + expect(response.to_s).to eq(rst_sample) + end + + it 'parses multiple crops properly' do + json_path = File.join(crop_data_dir, 'crop_multiple.json') + rst_path = File.join(crop_data_dir, 'crop_multiple.rst') + + json_sample = JSON.parse(File.read(json_path)) + rst_sample = File.read(rst_path) + + response = Mindee::V2::Product::Crop::CropResponse.new(json_sample) + + expect(response.inference).to be_a(Mindee::V2::Product::Crop::CropInference) + expect(response.inference.result).to be_a(Mindee::V2::Product::Crop::CropResult) + expect(response.inference.result.crops[0]).to be_a(Mindee::V2::Product::Crop::CropItem) + expect(response.inference.result.crops.size).to eq(2) + + # First Crop assertions + crop_zero = response.inference.result.crops[0] + expect(crop_zero.location.polygon.size).to eq(4) + expect(crop_zero.location.polygon[0][0]).to eq(0.214) + expect(crop_zero.location.polygon[0][1]).to eq(0.079) + expect(crop_zero.location.polygon[1][0]).to eq(0.476) + expect(crop_zero.location.polygon[1][1]).to eq(0.079) + expect(crop_zero.location.polygon[2][0]).to eq(0.476) + expect(crop_zero.location.polygon[2][1]).to eq(0.979) + expect(crop_zero.location.polygon[3][0]).to eq(0.214) + expect(crop_zero.location.polygon[3][1]).to eq(0.979) + + expect(crop_zero.location.page).to eq(0) + expect(crop_zero.object_type).to eq('invoice') + + # Second Crop assertions + crop_one = response.inference.result.crops[1] + expect(crop_one.location.polygon.size).to eq(4) + expect(crop_one.location.polygon[0][0]).to eq(0.547) + expect(crop_one.location.polygon[0][1]).to eq(0.15) + expect(crop_one.location.polygon[1][0]).to eq(0.862) + expect(crop_one.location.polygon[1][1]).to eq(0.15) + expect(crop_one.location.polygon[2][0]).to eq(0.862) + expect(crop_one.location.polygon[2][1]).to eq(0.97) + expect(crop_one.location.polygon[3][0]).to eq(0.547) + expect(crop_one.location.polygon[3][1]).to eq(0.97) + + expect(crop_one.location.page).to eq(0) + expect(crop_one.object_type).to eq('invoice') + + expect(response.to_s).to eq(rst_sample) + end +end From 798d30c582a456d694f0c331e4eb89c5582a76a4 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 25 Feb 2026 13:53:18 +0100 Subject: [PATCH 06/23] add integration test --- .github/workflows/_test-integrations.yml | 3 ++ lib/mindee/http/mindee_api_v2.rb | 1 + lib/mindee/input/base_parameters.rb | 18 +++++++----- lib/mindee/input/inference_parameters.rb | 7 +++-- sig/mindee/input/base_parameters.rbs | 4 +-- sig/mindee/input/inference_parameters.rbs | 2 +- spec/data.rb | 2 +- spec/v2/client_v2_integration.rb | 4 ++- spec/v2/input/inference_parameter_spec.rb | 2 +- spec/v2/input/local_response_v2_spec.rb | 2 +- spec/v2/parsing/inference_spec.rb | 4 +-- spec/v2/product/crop_integration.rb | 36 +++++++++++++++++++++++ spec/v2/product/crop_spec.rb | 2 +- 13 files changed, 66 insertions(+), 21 deletions(-) create mode 100644 spec/v2/product/crop_integration.rb diff --git a/.github/workflows/_test-integrations.yml b/.github/workflows/_test-integrations.yml index 0259f138..2c2fbebc 100644 --- a/.github/workflows/_test-integrations.yml +++ b/.github/workflows/_test-integrations.yml @@ -56,7 +56,10 @@ jobs: MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} MINDEE_V2_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} MINDEE_V2_SE_TESTS_BLANK_PDF_URL: ${{ secrets.MINDEE_V2_SE_TESTS_BLANK_PDF_URL }} + MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID }} + MINDEE_V2_SE_TESTS_CROP_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CROP_MODEL_ID }} MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }} + MINDEE_V2_SE_TESTS_OCR_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_OCR_MODEL_ID }} MINDEE_LOG_LEVEL: DEBUG run: | bundle exec rake integration diff --git a/lib/mindee/http/mindee_api_v2.rb b/lib/mindee/http/mindee_api_v2.rb index d3b32e63..698018d8 100644 --- a/lib/mindee/http/mindee_api_v2.rb +++ b/lib/mindee/http/mindee_api_v2.rb @@ -172,6 +172,7 @@ def enqueue_form_options(form_data, params) # @return [Net::HTTPResponse, nil] def enqueue(input_source, params) uri = URI("#{@settings.base_url}/products/#{params._slug}/enqueue") + puts "POST #{uri} from #{params.class}" form_data = if input_source.is_a?(Mindee::Input::Source::URLInputSource) [['url', input_source.url]] # : Array[untyped] diff --git a/lib/mindee/input/base_parameters.rb b/lib/mindee/input/base_parameters.rb index 15eaebf5..d28c1e30 100644 --- a/lib/mindee/input/base_parameters.rb +++ b/lib/mindee/input/base_parameters.rb @@ -19,9 +19,6 @@ class BaseParameters # @return [Boolean, nil] Whether to close the file after parsing. attr_reader :close_file - # @return [String] Slug for the endpoint. - @_slug = '' - # @param [String] model_id ID of the model # @param [String, nil] file_alias File alias, if applicable. # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. @@ -43,6 +40,15 @@ def initialize( @close_file = close_file.nil? || close_file end + # @return [String] Slug for the endpoint. + def self._slug + if self == BaseParameters + raise NotImplementedError, 'Cannot access `_slug` directly on the BaseParameters class.' + end + + '' + end + def self.from_hash(params: {}) load_from_hash(params: params) new( @@ -88,11 +94,7 @@ def append_form_data(form_data) # @return [String] Slug for the endpoint. def _slug - if self == BaseParameters - raise NotImplementedError, 'Cannot access `slug` directly on the BaseParameters class. Please use a subclass.' - end - - @_slug + self.class._slug end # Validates the parameters for async auto-polling diff --git a/lib/mindee/input/inference_parameters.rb b/lib/mindee/input/inference_parameters.rb index 060d015d..f2390831 100644 --- a/lib/mindee/input/inference_parameters.rb +++ b/lib/mindee/input/inference_parameters.rb @@ -29,7 +29,10 @@ class InferenceParameters < Mindee::Input::BaseParameters # @return [DataSchemaField] attr_reader :data_schema - @_slug = 'extraction' + # @return [String] Slug for the endpoint. + def self._slug + 'extraction' + end # rubocop:disable Metrics/ParameterLists # @param [String] model_id ID of the model @@ -89,7 +92,7 @@ def append_form_data(form_data) new_form_data end - # Loads the parameters from a Hash. + # Loads a prediction from a Hash. # @param [Hash] params Parameters to provide as a hash. # @return [InferenceParameters] def self.from_hash(params: {}) diff --git a/sig/mindee/input/base_parameters.rbs b/sig/mindee/input/base_parameters.rbs index ef9586b1..428d9c1f 100644 --- a/sig/mindee/input/base_parameters.rbs +++ b/sig/mindee/input/base_parameters.rbs @@ -3,7 +3,7 @@ module Mindee module Input class BaseParameters - self.@_slug: String + attr_reader self._slug: String def self.from_hash: (params: Hash[String | Symbol, untyped]) -> instance def self.load_from_hash: (params: Hash[String | Symbol, untyped]) -> Hash[String | Symbol, untyped] @@ -15,7 +15,6 @@ module Mindee attr_reader webhook_ids: Array[String]? attr_reader polling_options: PollingOptions attr_reader close_file: bool - self.@slug: String def initialize: ( String, @@ -26,7 +25,6 @@ module Mindee ) -> void def append_form_data: (Array[Array[untyped]]) -> Array[Array[untyped]] - def validate_async_params: () -> void private diff --git a/sig/mindee/input/inference_parameters.rbs b/sig/mindee/input/inference_parameters.rbs index 6d0f1404..de75acee 100644 --- a/sig/mindee/input/inference_parameters.rbs +++ b/sig/mindee/input/inference_parameters.rbs @@ -2,7 +2,7 @@ module Mindee module Input class InferenceParameters < BaseParameters - self.@_slug: String + def self._slug: -> String attr_reader confidence: bool? attr_reader polygon: bool? diff --git a/spec/data.rb b/spec/data.rb index d2f73eeb..e8fcf111 100644 --- a/spec/data.rb +++ b/spec/data.rb @@ -4,7 +4,7 @@ FILE_TYPES_DIR = File.join(ROOT_DATA_DIR, 'file_types').freeze V1_DATA_DIR = File.join(ROOT_DATA_DIR, 'v1').freeze V2_DATA_DIR = File.join(ROOT_DATA_DIR, 'v2').freeze -V2_PRODUCTS_DIR = File.join(V2_DATA_DIR, 'products').freeze +V2_PRODUCTS_DATA_DIR = File.join(V2_DATA_DIR, 'products').freeze V1_ASYNC_DIR = File.join(V1_DATA_DIR, 'async').freeze V1_PRODUCT_DATA_DIR = File.join(V1_DATA_DIR, 'products').freeze V1_OCR_DIR = File.join(V1_DATA_DIR, 'extras', 'ocr') diff --git a/spec/v2/client_v2_integration.rb b/spec/v2/client_v2_integration.rb index 722af829..0b54b1be 100644 --- a/spec/v2/client_v2_integration.rb +++ b/spec/v2/client_v2_integration.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require 'mindee' + describe 'Mindee::ClientV2 – integration tests (V2)', :integration, order: :defined do let(:api_key) { ENV.fetch('MINDEE_V2_API_KEY') } let(:model_id) { ENV.fetch('MINDEE_V2_FINDOC_MODEL_ID') } @@ -219,7 +221,7 @@ context 'A Data Schema Override' do it 'Overrides successfully' do - data_schema_replace = File.read(File.join(V2_PRODUCTS_DIR, 'extraction', + data_schema_replace = File.read(File.join(V2_PRODUCTS_DATA_DIR, 'extraction', 'data_schema_replace_param.json')) input = Mindee::Input::Source::PathInputSource.new(File.join(FILE_TYPES_DIR, 'pdf', 'blank_1.pdf')) diff --git a/spec/v2/input/inference_parameter_spec.rb b/spec/v2/input/inference_parameter_spec.rb index 6153bd23..3f3a1c7f 100644 --- a/spec/v2/input/inference_parameter_spec.rb +++ b/spec/v2/input/inference_parameter_spec.rb @@ -5,7 +5,7 @@ describe Mindee::Input::InferenceParameters do let(:extracted_schema_content) do - File.read(File.join(V2_PRODUCTS_DIR, 'extraction', 'data_schema_replace_param.json')) + File.read(File.join(V2_PRODUCTS_DATA_DIR, 'extraction', 'data_schema_replace_param.json')) end let(:extracted_schema_hash) { JSON.parse(extracted_schema_content) } let(:extracted_schema_str) { extracted_schema_hash.to_json } diff --git a/spec/v2/input/local_response_v2_spec.rb b/spec/v2/input/local_response_v2_spec.rb index 85065e77..a863b7d1 100644 --- a/spec/v2/input/local_response_v2_spec.rb +++ b/spec/v2/input/local_response_v2_spec.rb @@ -18,7 +18,7 @@ def assert_local_response(local_response) end describe Mindee::Input::LocalResponse do - let(:file_path) { File.join(V2_PRODUCTS_DIR, 'extraction', 'standard_field_types.json') } + let(:file_path) { File.join(V2_PRODUCTS_DATA_DIR, 'extraction', 'standard_field_types.json') } context 'A V2 local response' do it 'should load from a path' do response = Mindee::Input::LocalResponse.new(file_path) diff --git a/spec/v2/parsing/inference_spec.rb b/spec/v2/parsing/inference_spec.rb index 7986a555..0215f094 100644 --- a/spec/v2/parsing/inference_spec.rb +++ b/spec/v2/parsing/inference_spec.rb @@ -3,8 +3,8 @@ require 'mindee' RSpec.describe 'inference' do - let(:findoc_path) { File.join(V2_PRODUCTS_DIR, 'extraction', 'financial_document') } - let(:extraction_path) { File.join(V2_PRODUCTS_DIR, 'extraction') } + let(:findoc_path) { File.join(V2_PRODUCTS_DATA_DIR, 'extraction', 'financial_document') } + let(:extraction_path) { File.join(V2_PRODUCTS_DATA_DIR, 'extraction') } let(:deep_nested_field_path) { File.join(extraction_path, 'deep_nested_fields.json') } let(:standard_field_path) { File.join(extraction_path, 'standard_field_types.json') } let(:standard_field_rst_path) { File.join(extraction_path, 'standard_field_types.rst') } diff --git a/spec/v2/product/crop_integration.rb b/spec/v2/product/crop_integration.rb new file mode 100644 index 00000000..255424a2 --- /dev/null +++ b/spec/v2/product/crop_integration.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +require 'mindee' +require 'mindee/v2/product' + +require_relative '../../data' + +describe Mindee::ClientV2, :integration, :v2 do + let(:crop_model_id) do + ENV.fetch('MINDEE_V2_SE_TESTS_CROP_MODEL_ID', nil) + end + + let(:v2_client) do + Mindee::ClientV2.new + end + + it 'processes crop default sample correctly' do + + input_source = Mindee::Input::Source::PathInputSource.new( + File.join(V2_PRODUCT_DATA_DIR, 'crop', 'default_sample.jpg') + ) + + params = Mindee::Input::InferenceParameters.new(crop_model_id) + + response = v2_client.enqueue_and_get_result( + Mindee::V2::Product::Crop::CropResponse, + input_source, + params + ) + + expect(response.inference).not_to be_nil + expect(response.inference.file.name).to eq('default_sample.jpg') + expect(response.inference.result.crops).not_to be_empty + expect(response.inference.result.crops.size).to eq(2) + end +end diff --git a/spec/v2/product/crop_spec.rb b/spec/v2/product/crop_spec.rb index ee38a097..27c3cce2 100644 --- a/spec/v2/product/crop_spec.rb +++ b/spec/v2/product/crop_spec.rb @@ -6,7 +6,7 @@ require_relative '../../data' describe Mindee::V2::Product::Crop::CropResponse do - let(:crop_data_dir) { File.join(V2_PRODUCTS_DIR, 'crop') } + let(:crop_data_dir) { File.join(V2_PRODUCTS_DATA_DIR, 'crop') } it 'parses a single crop properly' do json_path = File.join(crop_data_dir, 'crop_single.json') From 1b1f845a2a5a6299e83f3d8c59164575d59f69e2 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 25 Feb 2026 16:33:11 +0100 Subject: [PATCH 07/23] fix lint --- spec/v2/product/crop_integration.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/spec/v2/product/crop_integration.rb b/spec/v2/product/crop_integration.rb index 255424a2..a5403eb9 100644 --- a/spec/v2/product/crop_integration.rb +++ b/spec/v2/product/crop_integration.rb @@ -15,7 +15,6 @@ end it 'processes crop default sample correctly' do - input_source = Mindee::Input::Source::PathInputSource.new( File.join(V2_PRODUCT_DATA_DIR, 'crop', 'default_sample.jpg') ) From 8ca174911ed2d41a71779ba8a297331b606b91e6 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 25 Feb 2026 17:09:18 +0100 Subject: [PATCH 08/23] :sparkles: add support for classification --- lib/mindee/parsing/v2/inference.rb | 6 +-- lib/mindee/v2/parsing.rb | 1 - lib/mindee/v2/parsing/base_inference.rb | 12 +++++ lib/mindee/v2/product.rb | 2 + .../classification_classifier.rb | 25 +++++++++++ .../classification_inference.rb | 34 ++++++++++++++ .../classification/classification_response.rb | 35 +++++++++++++++ .../classification/classification_result.rb | 27 +++++++++++ .../params/classification_parameters.rb | 45 +++++++++++++++++++ lib/mindee/v2/product/crop/crop_inference.rb | 8 +--- .../classification_classifier.rbs | 15 +++++++ .../classification_inference.rbs | 15 +++++++ .../classification/classification_result.rbs | 15 +++++++ sig/mindee/v2/parsing/base_inference.rbs | 2 + .../classification_response.rbs | 23 ++++++++++ .../classification_parameters.rbs | 25 +++++++++++ spec/data.rb | 2 +- spec/v2/client_v2_integration.rb | 2 +- spec/v2/input/inference_parameter_spec.rb | 2 +- spec/v2/input/local_response_v2_spec.rb | 2 +- spec/v2/parsing/inference_spec.rb | 4 +- .../classification_integration.rb | 32 +++++++++++++ .../classification/classification_spec.rb | 21 +++++++++ .../v2/product/{ => crop}/crop_integration.rb | 2 +- spec/v2/product/{ => crop}/crop_spec.rb | 4 +- 25 files changed, 340 insertions(+), 21 deletions(-) delete mode 100644 lib/mindee/v2/parsing.rb create mode 100644 lib/mindee/v2/product/classification/classification_classifier.rb create mode 100644 lib/mindee/v2/product/classification/classification_inference.rb create mode 100644 lib/mindee/v2/product/classification/classification_response.rb create mode 100644 lib/mindee/v2/product/classification/classification_result.rb create mode 100644 lib/mindee/v2/product/classification/params/classification_parameters.rb create mode 100644 sig/mindee/product/classification/classification_classifier.rbs create mode 100644 sig/mindee/product/classification/classification_inference.rbs create mode 100644 sig/mindee/product/classification/classification_result.rbs create mode 100644 sig/mindee/v2/product/classification/classification_response.rbs create mode 100644 sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs create mode 100644 spec/v2/product/classification/classification_integration.rb create mode 100644 spec/v2/product/classification/classification_spec.rb rename spec/v2/product/{ => crop}/crop_integration.rb (96%) rename spec/v2/product/{ => crop}/crop_spec.rb (97%) diff --git a/lib/mindee/parsing/v2/inference.rb b/lib/mindee/parsing/v2/inference.rb index 495134bd..2c00e590 100644 --- a/lib/mindee/parsing/v2/inference.rb +++ b/lib/mindee/parsing/v2/inference.rb @@ -28,11 +28,7 @@ def initialize(server_response) # @return [String] def to_s [ - 'Inference', - '#########', - @job.to_s, - @model.to_s, - @file.to_s, + super, @active_options.to_s, @result.to_s, '', diff --git a/lib/mindee/v2/parsing.rb b/lib/mindee/v2/parsing.rb deleted file mode 100644 index 8e9b8f90..00000000 --- a/lib/mindee/v2/parsing.rb +++ /dev/null @@ -1 +0,0 @@ -# frozen_string_literal: true diff --git a/lib/mindee/v2/parsing/base_inference.rb b/lib/mindee/v2/parsing/base_inference.rb index 0a9daebb..da2a867e 100644 --- a/lib/mindee/v2/parsing/base_inference.rb +++ b/lib/mindee/v2/parsing/base_inference.rb @@ -22,6 +22,18 @@ def initialize(http_response) @id = http_response['id'] @job = Mindee::Parsing::V2::InferenceJob.new(http_response['job']) if http_response.key?('job') end + + # String representation. + # @return [String] + def to_s + [ + 'Inference', + '#########', + @job.to_s, + @model.to_s, + @file.to_s, + ].join("\n") + end end end end diff --git a/lib/mindee/v2/product.rb b/lib/mindee/v2/product.rb index 730e3696..27eee97f 100644 --- a/lib/mindee/v2/product.rb +++ b/lib/mindee/v2/product.rb @@ -1,4 +1,6 @@ # frozen_string_literal: true +require_relative 'product/classification/classification_response' +require_relative 'product/classification/params/classification_parameters' require_relative 'product/crop/crop_response' require_relative 'product/crop/params/crop_parameters' diff --git a/lib/mindee/v2/product/classification/classification_classifier.rb b/lib/mindee/v2/product/classification/classification_classifier.rb new file mode 100644 index 00000000..288ee311 --- /dev/null +++ b/lib/mindee/v2/product/classification/classification_classifier.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Product + module Classification + # Classification of document type from the source file. + class ClassificationClassifier + # @return [String] The document type, as identified on given classification values. + attr_reader :document_type + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + @document_type = server_response['document_type'] + end + + # @return [String] String representation. + def to_s + "Document Type: #{@document_type}" + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/classification/classification_inference.rb b/lib/mindee/v2/product/classification/classification_inference.rb new file mode 100644 index 00000000..837c19c2 --- /dev/null +++ b/lib/mindee/v2/product/classification/classification_inference.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require_relative 'classification_result' + +module Mindee + module V2 + module Product + module Classification + # The inference result for a classification utility request. + class ClassificationInference < Mindee::V2::Parsing::BaseInference + # @return [ClassificationResult] Parsed inference payload. + attr_reader :result + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + super + + @result = ClassificationResult.new(server_response['result']) + end + + # String representation. + # @return [String] + def to_s + [ + super, + @result.to_s, + '', + ].join("\n") + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/classification/classification_response.rb b/lib/mindee/v2/product/classification/classification_response.rb new file mode 100644 index 00000000..f07271e1 --- /dev/null +++ b/lib/mindee/v2/product/classification/classification_response.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +require_relative '../../parsing/base_response' +require_relative 'classification_inference' +require_relative 'params/classification_parameters' + +module Mindee + module V2 + module Product + module Classification + # HTTP response wrapper that embeds a V2 Inference. + class ClassificationResponse < Mindee::V2::Parsing::BaseResponse + # @return [ClassificationInference] Parsed inference payload. + attr_reader :inference + + @_slug = 'classification' + @_params_type = Classification::Params::ClassificationParameters + + # @param server_response [Hash] Hash parsed from the API JSON response. + def initialize(server_response) + super + + @inference = ClassificationInference.new(server_response['inference']) + end + + # String representation. + # @return [String] + def to_s + @inference.to_s + end + end + end + end + end +end \ No newline at end of file diff --git a/lib/mindee/v2/product/classification/classification_result.rb b/lib/mindee/v2/product/classification/classification_result.rb new file mode 100644 index 00000000..a994b089 --- /dev/null +++ b/lib/mindee/v2/product/classification/classification_result.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require_relative 'classification_classifier' + +module Mindee + module V2 + module Product + module Classification + # Result of the document classifier inference. + class ClassificationResult + # @return [ClassificationClassifier] The document type, as identified on given classification values. + attr_reader :classification + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + @classification = ClassificationClassifier.new(server_response['classification']) + end + + # @return [String] String representation. + def to_s + "Classification\n==============\n#{@classification}" + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/classification/params/classification_parameters.rb b/lib/mindee/v2/product/classification/params/classification_parameters.rb new file mode 100644 index 00000000..4b2b1146 --- /dev/null +++ b/lib/mindee/v2/product/classification/params/classification_parameters.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Product + module Classification + module Params + class ClassificationParameters < Input::BaseParameters + # Parameters accepted by the classification utility v2 endpoint. + class ClassificationParameters < Mindee::Input::BaseParameters + @_slug = 'classification' + + # @param [String] model_id ID of the model + # @param [String, nil] file_alias File alias, if applicable. + # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. + # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Boolean, nil] close_file Whether to close the file after parsing. + def initialize( + model_id, + file_alias: nil, + webhook_ids: nil, + polling_options: nil, + close_file: true + ) + super + end + + # Loads the parameters from a Hash. + # @param [Hash] params Parameters to provide as a hash. + # @return [ClassificationParameters] + def self.from_hash(params: {}) + ClassificationParameters.new( + params.fetch(:model_id), + file_alias: params.fetch(:file_alias, nil), + webhook_ids: params.fetch(:webhook_ids, nil), + close_file: params.fetch(:close_file, true) + ) + end + end + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/crop/crop_inference.rb b/lib/mindee/v2/product/crop/crop_inference.rb index 7739949a..acc7a3fe 100644 --- a/lib/mindee/v2/product/crop/crop_inference.rb +++ b/lib/mindee/v2/product/crop/crop_inference.rb @@ -22,12 +22,8 @@ def initialize(server_response) # @return [String] def to_s [ - 'Inference', - '#########', - @job.to_s, - @model.to_s, - @file.to_s, - result.to_s, + super, + @result.to_s, '', ].join("\n") end diff --git a/sig/mindee/product/classification/classification_classifier.rbs b/sig/mindee/product/classification/classification_classifier.rbs new file mode 100644 index 00000000..4a1aafe5 --- /dev/null +++ b/sig/mindee/product/classification/classification_classifier.rbs @@ -0,0 +1,15 @@ +# lib/mindee/v2/product/classification/classification_classifier.rb + +module Mindee + module V2 + module Product + module Classification + class ClassificationClassifier + attr_reader document_type: String + + def initialize: (Hash[String | Symbol, untyped]) -> void + end + end + end + end +end diff --git a/sig/mindee/product/classification/classification_inference.rbs b/sig/mindee/product/classification/classification_inference.rbs new file mode 100644 index 00000000..af1ab163 --- /dev/null +++ b/sig/mindee/product/classification/classification_inference.rbs @@ -0,0 +1,15 @@ +module Mindee + module V2 + module Product + module Classification + class ClassificationInference + attr_reader result: ClassificationResult + + def initialize: (Hash[String | Symbol, untyped]) -> void + + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/product/classification/classification_result.rbs b/sig/mindee/product/classification/classification_result.rbs new file mode 100644 index 00000000..54dbeec9 --- /dev/null +++ b/sig/mindee/product/classification/classification_result.rbs @@ -0,0 +1,15 @@ +module Mindee + module V2 + module Product + module Classification + class ClassificationResult + attr_reader classification: ClassificationClassifier + + def initialize: (Hash[String | Symbol, untyped]) -> void + + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/parsing/base_inference.rbs b/sig/mindee/v2/parsing/base_inference.rbs index 4c2feb91..4f6b0396 100644 --- a/sig/mindee/v2/parsing/base_inference.rbs +++ b/sig/mindee/v2/parsing/base_inference.rbs @@ -10,6 +10,8 @@ module Mindee attr_reader model: Mindee::Parsing::V2::InferenceModel def initialize: (Hash[String | Symbol, untyped]) -> void + + def to_s: -> String end end end diff --git a/sig/mindee/v2/product/classification/classification_response.rbs b/sig/mindee/v2/product/classification/classification_response.rbs new file mode 100644 index 00000000..f2b7b755 --- /dev/null +++ b/sig/mindee/v2/product/classification/classification_response.rbs @@ -0,0 +1,23 @@ +# lib/mindee/v2/product/classification/classification_response.rb + +module Mindee + module V2 + module Product + module Classification + class ClassificationResponse + self.@_slug: String + self.@_params_type: singleton(Params::ClassificationParameters) + + attr_reader inference: Mindee::V2::Product::Classification::ClassificationInference + def initialize: (Hash[String | Symbol, untyped]) -> void + + def _params_type: -> singleton(Params::ClassificationParameters) + + def to_s: -> String + def self._params_type: () -> singleton(Params::ClassificationParameters) + def self._slug: () -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs b/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs new file mode 100644 index 00000000..662a52a8 --- /dev/null +++ b/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs @@ -0,0 +1,25 @@ +module Mindee + module V2 + module Product + module Classification + module Params + module ClassificationParameters + class ClassificationParameters + self.@_slug: String + + def self.from_hash: (params: Hash[String | Symbol, untyped]) -> ClassificationParameters + + def initialize: ( + String, + ?file_alias: String?, + ?webhook_ids: Array[String]?, + ?polling_options: Hash[Symbol | String, untyped] | Input::PollingOptions?, + ?close_file: bool? + ) -> void + end + end + end + end + end + end +end diff --git a/spec/data.rb b/spec/data.rb index e8fcf111..3550b5de 100644 --- a/spec/data.rb +++ b/spec/data.rb @@ -4,7 +4,7 @@ FILE_TYPES_DIR = File.join(ROOT_DATA_DIR, 'file_types').freeze V1_DATA_DIR = File.join(ROOT_DATA_DIR, 'v1').freeze V2_DATA_DIR = File.join(ROOT_DATA_DIR, 'v2').freeze -V2_PRODUCTS_DATA_DIR = File.join(V2_DATA_DIR, 'products').freeze +V2_PRODUCT_DATA_DIR = File.join(V2_DATA_DIR, 'products').freeze V1_ASYNC_DIR = File.join(V1_DATA_DIR, 'async').freeze V1_PRODUCT_DATA_DIR = File.join(V1_DATA_DIR, 'products').freeze V1_OCR_DIR = File.join(V1_DATA_DIR, 'extras', 'ocr') diff --git a/spec/v2/client_v2_integration.rb b/spec/v2/client_v2_integration.rb index 0b54b1be..598a72b9 100644 --- a/spec/v2/client_v2_integration.rb +++ b/spec/v2/client_v2_integration.rb @@ -221,7 +221,7 @@ context 'A Data Schema Override' do it 'Overrides successfully' do - data_schema_replace = File.read(File.join(V2_PRODUCTS_DATA_DIR, 'extraction', + data_schema_replace = File.read(File.join(V2_PRODUCT_DATA_DIR, 'extraction', 'data_schema_replace_param.json')) input = Mindee::Input::Source::PathInputSource.new(File.join(FILE_TYPES_DIR, 'pdf', 'blank_1.pdf')) diff --git a/spec/v2/input/inference_parameter_spec.rb b/spec/v2/input/inference_parameter_spec.rb index 3f3a1c7f..bd4ae299 100644 --- a/spec/v2/input/inference_parameter_spec.rb +++ b/spec/v2/input/inference_parameter_spec.rb @@ -5,7 +5,7 @@ describe Mindee::Input::InferenceParameters do let(:extracted_schema_content) do - File.read(File.join(V2_PRODUCTS_DATA_DIR, 'extraction', 'data_schema_replace_param.json')) + File.read(File.join(V2_PRODUCT_DATA_DIR, 'extraction', 'data_schema_replace_param.json')) end let(:extracted_schema_hash) { JSON.parse(extracted_schema_content) } let(:extracted_schema_str) { extracted_schema_hash.to_json } diff --git a/spec/v2/input/local_response_v2_spec.rb b/spec/v2/input/local_response_v2_spec.rb index a863b7d1..ee5dd549 100644 --- a/spec/v2/input/local_response_v2_spec.rb +++ b/spec/v2/input/local_response_v2_spec.rb @@ -18,7 +18,7 @@ def assert_local_response(local_response) end describe Mindee::Input::LocalResponse do - let(:file_path) { File.join(V2_PRODUCTS_DATA_DIR, 'extraction', 'standard_field_types.json') } + let(:file_path) { File.join(V2_PRODUCT_DATA_DIR, 'extraction', 'standard_field_types.json') } context 'A V2 local response' do it 'should load from a path' do response = Mindee::Input::LocalResponse.new(file_path) diff --git a/spec/v2/parsing/inference_spec.rb b/spec/v2/parsing/inference_spec.rb index 0215f094..f5beee4a 100644 --- a/spec/v2/parsing/inference_spec.rb +++ b/spec/v2/parsing/inference_spec.rb @@ -3,8 +3,8 @@ require 'mindee' RSpec.describe 'inference' do - let(:findoc_path) { File.join(V2_PRODUCTS_DATA_DIR, 'extraction', 'financial_document') } - let(:extraction_path) { File.join(V2_PRODUCTS_DATA_DIR, 'extraction') } + let(:findoc_path) { File.join(V2_PRODUCT_DATA_DIR, 'extraction', 'financial_document') } + let(:extraction_path) { File.join(V2_PRODUCT_DATA_DIR, 'extraction') } let(:deep_nested_field_path) { File.join(extraction_path, 'deep_nested_fields.json') } let(:standard_field_path) { File.join(extraction_path, 'standard_field_types.json') } let(:standard_field_rst_path) { File.join(extraction_path, 'standard_field_types.rst') } diff --git a/spec/v2/product/classification/classification_integration.rb b/spec/v2/product/classification/classification_integration.rb new file mode 100644 index 00000000..affff68d --- /dev/null +++ b/spec/v2/product/classification/classification_integration.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require 'mindee' + +RSpec.describe Mindee::ClientV2, :integration, :v2 do + let(:classification_model_id) do + ENV.fetch('MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID', nil) + end + + let(:v2_client) do + Mindee::ClientV2.new + end + + it 'processes classification default sample correctly' do + input_source = Mindee::Input::Source::PathInputSource.new( + File.join(V2_PRODUCT_DATA_DIR, 'classification', 'default_invoice.jpg') + ) + + params = Mindee::Input::InferenceParameters.new(classification_model_id) + + response = v2_client.enqueue_and_get_result( + Mindee::V2::Product::Classification::ClassificationResponse, + input_source, + params + ) + + expect(response.inference).not_to be_nil + expect(response.inference.file.name).to eq('default_invoice.jpg') + expect(response.inference.result.classification).not_to be_nil + expect(response.inference.result.classification.document_type).to eq('invoice') + end +end diff --git a/spec/v2/product/classification/classification_spec.rb b/spec/v2/product/classification/classification_spec.rb new file mode 100644 index 00000000..3469a1eb --- /dev/null +++ b/spec/v2/product/classification/classification_spec.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +require 'json' +require 'mindee/v2/product' + +RSpec.describe Mindee::V2::Product::Classification::ClassificationResponse, :v2 do + let(:classification_data_dir) { File.join(V2_PRODUCT_DATA_DIR, 'classification') } + + it 'parses a single classification properly' do + json_path = File.join(classification_data_dir, 'classification_single.json') + json_sample = JSON.parse(File.read(json_path)) + + response = Mindee::V2::Product::Classification::ClassificationResponse.new(json_sample) + + expect(response.inference).to be_a(Mindee::V2::Product::Classification::ClassificationInference) + expect(response.inference.result).to be_a(Mindee::V2::Product::Classification::ClassificationResult) + expect(response.inference.result.classification).to be_a(Mindee::V2::Product::Classification::ClassificationClassifier) + + expect(response.inference.result.classification.document_type).to eq('invoice') + end +end diff --git a/spec/v2/product/crop_integration.rb b/spec/v2/product/crop/crop_integration.rb similarity index 96% rename from spec/v2/product/crop_integration.rb rename to spec/v2/product/crop/crop_integration.rb index a5403eb9..cf386004 100644 --- a/spec/v2/product/crop_integration.rb +++ b/spec/v2/product/crop/crop_integration.rb @@ -3,7 +3,7 @@ require 'mindee' require 'mindee/v2/product' -require_relative '../../data' +require_relative '../../../data' describe Mindee::ClientV2, :integration, :v2 do let(:crop_model_id) do diff --git a/spec/v2/product/crop_spec.rb b/spec/v2/product/crop/crop_spec.rb similarity index 97% rename from spec/v2/product/crop_spec.rb rename to spec/v2/product/crop/crop_spec.rb index 27c3cce2..c6a6ce53 100644 --- a/spec/v2/product/crop_spec.rb +++ b/spec/v2/product/crop/crop_spec.rb @@ -3,10 +3,10 @@ require 'json' require 'mindee/v2/product' -require_relative '../../data' +require_relative '../../../data' describe Mindee::V2::Product::Crop::CropResponse do - let(:crop_data_dir) { File.join(V2_PRODUCTS_DATA_DIR, 'crop') } + let(:crop_data_dir) { File.join(V2_PRODUCT_DATA_DIR, 'crop') } it 'parses a single crop properly' do json_path = File.join(crop_data_dir, 'crop_single.json') From 331daae506fd124886fc4e14f6e12234fa1d82ef Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 25 Feb 2026 17:30:18 +0100 Subject: [PATCH 09/23] :sparkles: add support for OCR --- lib/mindee/v2/product.rb | 2 + .../classification/classification_response.rb | 2 +- lib/mindee/v2/product/ocr/ocr_inference.rb | 34 +++++++ lib/mindee/v2/product/ocr/ocr_page.rb | 33 +++++++ lib/mindee/v2/product/ocr/ocr_response.rb | 35 +++++++ lib/mindee/v2/product/ocr/ocr_result.rb | 34 +++++++ lib/mindee/v2/product/ocr/ocr_word.rb | 29 ++++++ .../v2/product/ocr/params/ocr_parameters.rb | 45 +++++++++ sig/mindee/v2/product/ocr/ocr_inference.rbs | 14 +++ sig/mindee/v2/product/ocr/ocr_page.rbs | 15 +++ sig/mindee/v2/product/ocr/ocr_response.rbs | 23 +++++ sig/mindee/v2/product/ocr/ocr_result.rbs | 14 +++ sig/mindee/v2/product/ocr/ocr_word.rbs | 15 +++ .../params/ocr_parameters/ocr_parameters.rbs | 25 +++++ spec/v2/client_v2_spec.rb | 2 +- spec/v2/parsing/inference_spec.rb | 2 +- spec/v2/parsing/job_webhook_spec.rb | 2 +- .../classification_integration.rb | 2 +- .../classification/classification_spec.rb | 6 +- spec/v2/product/ocr/ocr_integration.rb | 34 +++++++ spec/v2/product/ocr/ocr_spec.rb | 96 +++++++++++++++++++ 21 files changed, 457 insertions(+), 7 deletions(-) create mode 100644 lib/mindee/v2/product/ocr/ocr_inference.rb create mode 100644 lib/mindee/v2/product/ocr/ocr_page.rb create mode 100644 lib/mindee/v2/product/ocr/ocr_response.rb create mode 100644 lib/mindee/v2/product/ocr/ocr_result.rb create mode 100644 lib/mindee/v2/product/ocr/ocr_word.rb create mode 100644 lib/mindee/v2/product/ocr/params/ocr_parameters.rb create mode 100644 sig/mindee/v2/product/ocr/ocr_inference.rbs create mode 100644 sig/mindee/v2/product/ocr/ocr_page.rbs create mode 100644 sig/mindee/v2/product/ocr/ocr_response.rbs create mode 100644 sig/mindee/v2/product/ocr/ocr_result.rbs create mode 100644 sig/mindee/v2/product/ocr/ocr_word.rbs create mode 100644 sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs create mode 100644 spec/v2/product/ocr/ocr_integration.rb create mode 100644 spec/v2/product/ocr/ocr_spec.rb diff --git a/lib/mindee/v2/product.rb b/lib/mindee/v2/product.rb index 27eee97f..5a25ab4b 100644 --- a/lib/mindee/v2/product.rb +++ b/lib/mindee/v2/product.rb @@ -4,3 +4,5 @@ require_relative 'product/classification/params/classification_parameters' require_relative 'product/crop/crop_response' require_relative 'product/crop/params/crop_parameters' +require_relative 'product/ocr/ocr_response' +require_relative 'product/ocr/params/ocr_parameters' diff --git a/lib/mindee/v2/product/classification/classification_response.rb b/lib/mindee/v2/product/classification/classification_response.rb index f07271e1..24b8b4a1 100644 --- a/lib/mindee/v2/product/classification/classification_response.rb +++ b/lib/mindee/v2/product/classification/classification_response.rb @@ -32,4 +32,4 @@ def to_s end end end -end \ No newline at end of file +end diff --git a/lib/mindee/v2/product/ocr/ocr_inference.rb b/lib/mindee/v2/product/ocr/ocr_inference.rb new file mode 100644 index 00000000..5daeb062 --- /dev/null +++ b/lib/mindee/v2/product/ocr/ocr_inference.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require_relative 'ocr_result' + +module Mindee + module V2 + module Product + module Ocr + # The inference result for an OCR utility request. + class OcrInference < Mindee::V2::Parsing::BaseInference + # @return [OcrResult] Parsed inference payload. + attr_reader :result + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + super + + @result = OcrResult.new(server_response['result']) + end + + # String representation. + # @return [String] + def to_s + [ + super, + @result.to_s, + '', + ].join("\n") + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/ocr/ocr_page.rb b/lib/mindee/v2/product/ocr/ocr_page.rb new file mode 100644 index 00000000..5de39688 --- /dev/null +++ b/lib/mindee/v2/product/ocr/ocr_page.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +require_relative 'ocr_word' + +module Mindee + module V2 + module Product + module Ocr + # OCR result for a single page. + class OcrPage + # @return [Array] List of words extracted from the document page. + attr_reader :words + # @return [String] Full text content extracted from the document page. + attr_reader :content + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + @words = server_response['words'].map { |word| OcrWord.new(word) } + @content = server_response['content'] + end + + # String representation. + # @return [String] + def to_s + ocr_words = "\n" + ocr_words += @words.map(&:to_s).join("\n\n") if @words&.any? + "OCR Words\n======#{ocr_words}\n\n:Content: #{@content}" + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/ocr/ocr_response.rb b/lib/mindee/v2/product/ocr/ocr_response.rb new file mode 100644 index 00000000..331d3b74 --- /dev/null +++ b/lib/mindee/v2/product/ocr/ocr_response.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +require_relative '../../parsing/base_response' +require_relative 'params/ocr_parameters' +require_relative 'ocr_inference' + +module Mindee + module V2 + module Product + module Ocr + # HTTP response wrapper that embeds a V2 Inference. + class OcrResponse < Mindee::V2::Parsing::BaseResponse + # @return [OcrInference] Parsed inference payload. + attr_reader :inference + + @_slug = 'ocr' + @_params_type = Ocr::Params::OcrParameters + + # @param server_response [Hash] Hash parsed from the API JSON response. + def initialize(server_response) + super + + @inference = OcrInference.new(server_response['inference']) + end + + # String representation. + # @return [String] + def to_s + @inference.to_s + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/ocr/ocr_result.rb b/lib/mindee/v2/product/ocr/ocr_result.rb new file mode 100644 index 00000000..093a6ae9 --- /dev/null +++ b/lib/mindee/v2/product/ocr/ocr_result.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require_relative 'ocr_page' + +module Mindee + module V2 + module Product + module Ocr + # Result of a ocr utility inference. + class OcrResult + # @return [Array] List of OCR results for each page in the document. + attr_reader :pages + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + @pages = if server_response.key?('pages') + server_response['pages'].map do |pages| + OcrPage.new(pages) + end + end + end + + # String representation. + # @return [String] + def to_s + pages_str = @pages.join("\n") + + "Pages\n======\n#{pages_str}" + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/ocr/ocr_word.rb b/lib/mindee/v2/product/ocr/ocr_word.rb new file mode 100644 index 00000000..489c7dc8 --- /dev/null +++ b/lib/mindee/v2/product/ocr/ocr_word.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Product + module Ocr + # OCR result for a single word extracted from the document page. + class OcrWord + # @return [String] Text content of the word. + attr_reader :content + # @return [Mindee::Geometry::Polygon] Position information as a list of points in clockwise order. + attr_reader :polygon + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + @content = server_response['content'] + @polygon = Mindee::Geometry::Polygon.new(server_response['polygon']) + end + + # String representation. + # @return [String] + def to_s + @content + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/ocr/params/ocr_parameters.rb b/lib/mindee/v2/product/ocr/params/ocr_parameters.rb new file mode 100644 index 00000000..4544a56d --- /dev/null +++ b/lib/mindee/v2/product/ocr/params/ocr_parameters.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Product + module Ocr + module Params + class OcrParameters < Input::BaseParameters + # Parameters accepted by the ocr utility v2 endpoint. + class OcrParameters < Mindee::Input::BaseParameters + @_slug = 'ocr' + + # @param [String] model_id ID of the model + # @param [String, nil] file_alias File alias, if applicable. + # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. + # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Boolean, nil] close_file Whether to close the file after parsing. + def initialize( + model_id, + file_alias: nil, + webhook_ids: nil, + polling_options: nil, + close_file: true + ) + super + end + + # Loads the parameters from a Hash. + # @param [Hash] params Parameters to provide as a hash. + # @return [OcrParameters] + def self.from_hash(params: {}) + OcrParameters.new( + params.fetch(:model_id), + file_alias: params.fetch(:file_alias, nil), + webhook_ids: params.fetch(:webhook_ids, nil), + close_file: params.fetch(:close_file, true) + ) + end + end + end + end + end + end + end +end diff --git a/sig/mindee/v2/product/ocr/ocr_inference.rbs b/sig/mindee/v2/product/ocr/ocr_inference.rbs new file mode 100644 index 00000000..029aa6fb --- /dev/null +++ b/sig/mindee/v2/product/ocr/ocr_inference.rbs @@ -0,0 +1,14 @@ +module Mindee + module V2 + module Product + module Ocr + class OcrInference + attr_reader result: OcrResult + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/ocr/ocr_page.rbs b/sig/mindee/v2/product/ocr/ocr_page.rbs new file mode 100644 index 00000000..11cd9601 --- /dev/null +++ b/sig/mindee/v2/product/ocr/ocr_page.rbs @@ -0,0 +1,15 @@ +module Mindee + module V2 + module Product + module Ocr + class OcrPage + attr_reader words: Array[OcrWord] + attr_reader content: String + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/ocr/ocr_response.rbs b/sig/mindee/v2/product/ocr/ocr_response.rbs new file mode 100644 index 00000000..4841dca2 --- /dev/null +++ b/sig/mindee/v2/product/ocr/ocr_response.rbs @@ -0,0 +1,23 @@ +# lib/mindee/v2/product/ocr/ocr_response.rb + +module Mindee + module V2 + module Product + module Ocr + class OcrResponse + self.@_slug: String + self.@_params_type: singleton(Params::OcrParameters) + + attr_reader inference: Mindee::V2::Product::Ocr::OcrInference + def initialize: (Hash[String | Symbol, untyped]) -> void + + def _params_type: -> singleton(Params::OcrParameters) + + def to_s: -> String + def self._params_type: () -> singleton(Params::OcrParameters) + def self._slug: () -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/ocr/ocr_result.rbs b/sig/mindee/v2/product/ocr/ocr_result.rbs new file mode 100644 index 00000000..c35b0d73 --- /dev/null +++ b/sig/mindee/v2/product/ocr/ocr_result.rbs @@ -0,0 +1,14 @@ +module Mindee + module V2 + module Product + module Ocr + class OcrResult + attr_reader pages: Array[Mindee::V2::Product::Ocr::OcrPage] + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/ocr/ocr_word.rbs b/sig/mindee/v2/product/ocr/ocr_word.rbs new file mode 100644 index 00000000..2d9967fb --- /dev/null +++ b/sig/mindee/v2/product/ocr/ocr_word.rbs @@ -0,0 +1,15 @@ +module Mindee + module V2 + module Product + module Ocr + class OcrWord + attr_reader content: String + attr_reader polygon: Geometry::Polygon + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs b/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs new file mode 100644 index 00000000..18732072 --- /dev/null +++ b/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs @@ -0,0 +1,25 @@ +module Mindee + module V2 + module Product + module Ocr + module Params + module OcrParameters + class OcrParameters + self.@_slug: String + + def self.from_hash: (params: Hash[String | Symbol, untyped]) -> OcrParameters + + def initialize: ( + String, + ?file_alias: String?, + ?webhook_ids: Array[String]?, + ?polling_options: Hash[Symbol | String, untyped] | Input::PollingOptions?, + ?close_file: bool? + ) -> void + end + end + end + end + end + end +end diff --git a/spec/v2/client_v2_spec.rb b/spec/v2/client_v2_spec.rb index dc1b3714..57c3c2fa 100644 --- a/spec/v2/client_v2_spec.rb +++ b/spec/v2/client_v2_spec.rb @@ -4,7 +4,7 @@ require 'mindee' require_relative '../http/mock_http_response' # <- the original helper -RSpec.describe Mindee::ClientV2 do +describe Mindee::ClientV2 do let(:input_doc) { Mindee::Input::Source::PathInputSource.new(File.join(FILE_TYPES_DIR, 'pdf', 'blank.pdf')) } let(:base_url) { 'https://dummy-url' } let(:api_key) { 'dummy-api-key' } diff --git a/spec/v2/parsing/inference_spec.rb b/spec/v2/parsing/inference_spec.rb index f5beee4a..31f2c2b0 100644 --- a/spec/v2/parsing/inference_spec.rb +++ b/spec/v2/parsing/inference_spec.rb @@ -2,7 +2,7 @@ require 'mindee' -RSpec.describe 'inference' do +describe 'inference' do let(:findoc_path) { File.join(V2_PRODUCT_DATA_DIR, 'extraction', 'financial_document') } let(:extraction_path) { File.join(V2_PRODUCT_DATA_DIR, 'extraction') } let(:deep_nested_field_path) { File.join(extraction_path, 'deep_nested_fields.json') } diff --git a/spec/v2/parsing/job_webhook_spec.rb b/spec/v2/parsing/job_webhook_spec.rb index bf046760..b07e6263 100644 --- a/spec/v2/parsing/job_webhook_spec.rb +++ b/spec/v2/parsing/job_webhook_spec.rb @@ -2,7 +2,7 @@ require 'mindee' -RSpec.describe Mindee::Parsing::V2::JobWebhook do +describe Mindee::Parsing::V2::JobWebhook do describe '#initialize' do context 'when error key is present but value is nil' do it 'does not raise an error and sets @error to nil' do diff --git a/spec/v2/product/classification/classification_integration.rb b/spec/v2/product/classification/classification_integration.rb index affff68d..86df9890 100644 --- a/spec/v2/product/classification/classification_integration.rb +++ b/spec/v2/product/classification/classification_integration.rb @@ -2,7 +2,7 @@ require 'mindee' -RSpec.describe Mindee::ClientV2, :integration, :v2 do +describe Mindee::ClientV2, :integration, :v2 do let(:classification_model_id) do ENV.fetch('MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID', nil) end diff --git a/spec/v2/product/classification/classification_spec.rb b/spec/v2/product/classification/classification_spec.rb index 3469a1eb..12dd00ab 100644 --- a/spec/v2/product/classification/classification_spec.rb +++ b/spec/v2/product/classification/classification_spec.rb @@ -3,7 +3,7 @@ require 'json' require 'mindee/v2/product' -RSpec.describe Mindee::V2::Product::Classification::ClassificationResponse, :v2 do +describe Mindee::V2::Product::Classification::ClassificationResponse, :v2 do let(:classification_data_dir) { File.join(V2_PRODUCT_DATA_DIR, 'classification') } it 'parses a single classification properly' do @@ -14,7 +14,9 @@ expect(response.inference).to be_a(Mindee::V2::Product::Classification::ClassificationInference) expect(response.inference.result).to be_a(Mindee::V2::Product::Classification::ClassificationResult) - expect(response.inference.result.classification).to be_a(Mindee::V2::Product::Classification::ClassificationClassifier) + expect( + response.inference.result.classification + ).to be_a(Mindee::V2::Product::Classification::ClassificationClassifier) expect(response.inference.result.classification.document_type).to eq('invoice') end diff --git a/spec/v2/product/ocr/ocr_integration.rb b/spec/v2/product/ocr/ocr_integration.rb new file mode 100644 index 00000000..58958829 --- /dev/null +++ b/spec/v2/product/ocr/ocr_integration.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require 'mindee' + +describe Mindee::ClientV2, :integration, :v2 do + let(:ocr_model_id) do + ENV.fetch('MINDEE_V2_SE_TESTS_OCR_MODEL_ID') + end + + let(:v2_client) do + Mindee::ClientV2.new + end + + it 'processes ocr default sample correctly' do + input_source = Mindee::Input::Source::PathInputSource.new( + File.join(V2_PRODUCT_DATA_DIR, 'ocr', 'default_sample.jpg') + ) + + params = Mindee::Input::InferenceParameters.new(ocr_model_id) + + response = v2_client.enqueue_and_get_result( + Mindee::V2::Product::Ocr::OcrResponse, + input_source, + params + ) + + expect(response.inference).not_to be_nil + expect(response.inference.file.name).to eq('default_sample.jpg') + expect(response.inference).to be_a(Mindee::V2::Product::Ocr::OcrInference) + expect(response.inference.result).to be_a(Mindee::V2::Product::Ocr::OcrResult) + expect(response.inference.result.pages.size).to eq(1) + expect(response.inference.result.pages[0].words.size).to be > 5 + end +end diff --git a/spec/v2/product/ocr/ocr_spec.rb b/spec/v2/product/ocr/ocr_spec.rb new file mode 100644 index 00000000..8f31560d --- /dev/null +++ b/spec/v2/product/ocr/ocr_spec.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +require 'json' +require 'mindee' + +describe Mindee::V2::Product::Ocr::OcrResponse, :v2 do + let(:ocr_data_dir) { File.join(V2_PRODUCT_DATA_DIR, 'ocr') } + + it 'parses a single page OCR response properly' do + json_path = File.join(ocr_data_dir, 'ocr_single.json') + json_sample = JSON.parse(File.read(json_path)) + + response = Mindee::V2::Product::Ocr::OcrResponse.new(json_sample) + + expect(response.inference).to be_a(Mindee::V2::Product::Ocr::OcrInference) + expect(response.inference.result.pages).not_to be_empty + expect(response.inference.result.pages.size).to eq(1) + + page = response.inference.result.pages[0] + first_word = page.words[0] + + expect(first_word.content).to eq('Shipper:') + expect(first_word.polygon[0][0]).to eq(0.09742441209406495) + expect(first_word.polygon[0][1]).to eq(0.07007125890736342) + expect(first_word.polygon[1][0]).to eq(0.15621500559910415) + expect(first_word.polygon[1][1]).to eq(0.07046714172604909) + expect(first_word.polygon[2][0]).to eq(0.15621500559910415) + expect(first_word.polygon[2][1]).to eq(0.08155186064924783) + expect(first_word.polygon[3][0]).to eq(0.09742441209406495) + expect(first_word.polygon[3][1]).to eq(0.08155186064924783) + + expect(page.words.size).to eq(305) + expect(page.content).to eq( + 'Shipper: GLOBAL FREIGHT SOLUTIONS INC. 123 OCEAN DRIVE SHANGHAI, CHINA TEL: ' \ + "86-21-12345678 FAX: 86-21-87654321\nConsignee: PACIFIC TRADING CO. 789 TRADE " \ + "STREET SINGAPORE 567890 SINGAPORE TEL: 65-65432100 FAX: 65-65432101\nNotify " \ + "Party (Complete name and address): SAME AS CONSIGNEE\nBILL OF LADING\nJob No " \ + ".: XYZ123456\nGLOBAL SHIPPING CO\nPlace of receipt:\nSHANGHAI, CHINA\nOcean " \ + "vessel:\nGLOBAL VOYAGER V-202\nPort of loading:\nSHANGHAI, CHINA\nPort of " \ + "discharge:\nLOS ANGELES, USA\nPlace of delivery:\nLOS ANGELES, USA\nMarks and " \ + "numbers:\nP+F\n(IN DIA.)\nP/N: 12345\nDRAWING NO. A1B2C3\nNumber and kinds of " \ + "packages: 1CTN ELECTRONIC COMPONENTS 50 PCS\nDescription of goods:\nGross " \ + "weight:\n500 KGS\nMeasurement:\n1.5 M3\nP/O: 987654 LOT NO. " \ + "112233\nFFAU1234567/40'HQ/CFS-CFS ICTN/500KGS/1.5M3 SEAL NO:ABC1234567\nMADE " \ + "IN CHINA\nSAY TOTAL:\n2 PLTS ONLY\n\"FREIGHT COLLECT\" CFS-CFS\n** SURRENDERED " \ + "**\nFreight and Charge\nOCEAN FREIGHT\nRevenue tons\nRate\nPrepaid\nCollect\n" \ + "AS ARRANGED\nThe goods and instructions are accepted and dealt with subject " \ + 'to the Standard Conditions printed overleaf. Taken in charge in apparent good ' \ + 'order and condition, unless otherwise noted herein, at the place of receipt ' \ + 'for transport and delivery as mentioned above. One of these Combined ' \ + 'Transport Bills of Lading must be surrendered duly endorsed in exchange for ' \ + 'the goods. In Witness whereof the original Combined Transport Bills of Lading ' \ + 'all of this tenor and date have been signed in the number stated below, one ' \ + "of which being accomplished the other(s) to be void.\nUSD: 31.57 SHIPPED ON " \ + "BOARD: 30. SEP. 2022\nFreight Amount OCEAN FREIGHT\nFreight payable at\n" \ + "DESTINATION\nNumber of original\nZERO (0)\nCargo insurance\nnot covered\n" \ + "Covered according to attached Policy\nPlace and date of issue\nTAIPEI, " \ + "TAIWAN: 30. SEP. 2022\nFor delivery of goods please apply to: INTERNATIONAL " \ + 'LOGISTICS LTD 456 SHIPPING LANE LOS ANGELES, CA 90001 USA TEL:1-213-9876543 ' \ + "FAX:1-213-9876544 ATTN: MR. JOHN DOE\nSignature: GLOBAL SHIPPING CO., " \ + "LTD.\nBY\nAS CARRIER" + ) + end + + it 'parses a multiple page OCR response properly' do + json_path = File.join(ocr_data_dir, 'ocr_multiple.json') + json_sample = JSON.parse(File.read(json_path)) + + response = Mindee::V2::Product::Ocr::OcrResponse.new(json_sample) + + expect(response.inference).to be_a(Mindee::V2::Product::Ocr::OcrInference) + expect(response.inference.result).to be_a(Mindee::V2::Product::Ocr::OcrResult) + expect(response.inference.result.pages[0]).to be_a(Mindee::V2::Product::Ocr::OcrPage) + expect(response.inference.result.pages.size).to eq(3) + + page_zero_words = response.inference.result.pages[0].words + expect(page_zero_words.size).to eq(295) + expect(page_zero_words[0].content).to eq('FICTIOCORP') + expect(page_zero_words[0].polygon[0][0]).to eq(0.06649402824332337) + expect(page_zero_words[0].polygon[0][1]).to eq(0.03957449719523875) + expect(page_zero_words[0].polygon[1][0]).to eq(0.23219061218068954) + expect(page_zero_words[0].polygon[1][1]).to eq(0.03960015049938432) + expect(page_zero_words[0].polygon[2][0]).to eq(0.23219061218068954) + expect(page_zero_words[0].polygon[2][1]).to eq(0.06770762074155151) + expect(page_zero_words[0].polygon[3][0]).to eq(0.06649402824332337) + expect(page_zero_words[0].polygon[3][1]).to eq(0.06770762074155151) + + page_one_words = response.inference.result.pages[1].words + expect(page_one_words.size).to eq(450) + expect(page_one_words[0].content).to eq('KEOLIO') + + page_two_words = response.inference.result.pages[2].words + expect(page_two_words.size).to eq(355) + expect(page_two_words[0].content).to eq('KEOLIO') + end +end From 48974e011de7e0a604ecb3fc4d39189f77dd4746 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 25 Feb 2026 17:48:33 +0100 Subject: [PATCH 10/23] :sparkles: add support for split --- lib/mindee/v2/product.rb | 2 + .../product/split/params/split_parameters.rb | 45 ++++++++++++++++ .../v2/product/split/split_inference.rb | 34 ++++++++++++ lib/mindee/v2/product/split/split_range.rb | 30 +++++++++++ lib/mindee/v2/product/split/split_response.rb | 35 +++++++++++++ lib/mindee/v2/product/split/split_result.rb | 34 ++++++++++++ .../split_parameters/split_parameters.rbs | 25 +++++++++ .../v2/product/split/split_inference.rbs | 14 +++++ sig/mindee/v2/product/split/split_range.rbs | 15 ++++++ .../v2/product/split/split_response.rbs | 23 ++++++++ sig/mindee/v2/product/split/split_result.rbs | 14 +++++ .../classification_integration.rb | 3 +- spec/v2/product/crop/crop_integration.rb | 4 +- spec/v2/product/ocr/ocr_integration.rb | 3 +- spec/v2/product/split/split_integration.rb | 33 ++++++++++++ spec/v2/product/split/split_spec.rb | 52 +++++++++++++++++++ 16 files changed, 361 insertions(+), 5 deletions(-) create mode 100644 lib/mindee/v2/product/split/params/split_parameters.rb create mode 100644 lib/mindee/v2/product/split/split_inference.rb create mode 100644 lib/mindee/v2/product/split/split_range.rb create mode 100644 lib/mindee/v2/product/split/split_response.rb create mode 100644 lib/mindee/v2/product/split/split_result.rb create mode 100644 sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs create mode 100644 sig/mindee/v2/product/split/split_inference.rbs create mode 100644 sig/mindee/v2/product/split/split_range.rbs create mode 100644 sig/mindee/v2/product/split/split_response.rbs create mode 100644 sig/mindee/v2/product/split/split_result.rbs create mode 100644 spec/v2/product/split/split_integration.rb create mode 100644 spec/v2/product/split/split_spec.rb diff --git a/lib/mindee/v2/product.rb b/lib/mindee/v2/product.rb index 5a25ab4b..c34a2224 100644 --- a/lib/mindee/v2/product.rb +++ b/lib/mindee/v2/product.rb @@ -6,3 +6,5 @@ require_relative 'product/crop/params/crop_parameters' require_relative 'product/ocr/ocr_response' require_relative 'product/ocr/params/ocr_parameters' +require_relative 'product/split/split_response' +require_relative 'product/split/params/split_parameters' diff --git a/lib/mindee/v2/product/split/params/split_parameters.rb b/lib/mindee/v2/product/split/params/split_parameters.rb new file mode 100644 index 00000000..a82a1581 --- /dev/null +++ b/lib/mindee/v2/product/split/params/split_parameters.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Product + module Split + module Params + class SplitParameters < Input::BaseParameters + # Parameters accepted by the split utility v2 endpoint. + class SplitParameters < Mindee::Input::BaseParameters + @_slug = 'split' + + # @param [String] model_id ID of the model + # @param [String, nil] file_alias File alias, if applicable. + # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. + # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Boolean, nil] close_file Whether to close the file after parsing. + def initialize( + model_id, + file_alias: nil, + webhook_ids: nil, + polling_options: nil, + close_file: true + ) + super + end + + # Loads the parameters from a Hash. + # @param [Hash] params Parameters to provide as a hash. + # @return [SplitParameters] + def self.from_hash(params: {}) + SplitParameters.new( + params.fetch(:model_id), + file_alias: params.fetch(:file_alias, nil), + webhook_ids: params.fetch(:webhook_ids, nil), + close_file: params.fetch(:close_file, true) + ) + end + end + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/split/split_inference.rb b/lib/mindee/v2/product/split/split_inference.rb new file mode 100644 index 00000000..4401c9b9 --- /dev/null +++ b/lib/mindee/v2/product/split/split_inference.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require_relative 'split_result' + +module Mindee + module V2 + module Product + module Split + # Split inference result. + class SplitInference < Mindee::V2::Parsing::BaseInference + # @return [SplitResult] Result of a split inference. + attr_reader :result + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + super + + @result = SplitResult.new(server_response['result']) + end + + # String representation. + # @return [String] + def to_s + [ + super, + @result.to_s, + '', + ].join("\n") + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/split/split_range.rb b/lib/mindee/v2/product/split/split_range.rb new file mode 100644 index 00000000..6eaabd12 --- /dev/null +++ b/lib/mindee/v2/product/split/split_range.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module Mindee + module V2 + module Product + module Split + # Split inference result. + class SplitRange + # @return [Array] 0-based page indexes, where the first integer indicates the start page and the + # second integer indicates the end page. + attr_reader :page_range + # @return [String] The document type, as identified on given classification values. + attr_reader :document_type + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + @page_range = server_response['page_range'] + @document_type = server_response['document_type'] + end + + # String representation. + # @return [String] + def to_s + "* :Page Range: #{@page_range}\n :Document Type: #{@document_type}" + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/split/split_response.rb b/lib/mindee/v2/product/split/split_response.rb new file mode 100644 index 00000000..54190d5d --- /dev/null +++ b/lib/mindee/v2/product/split/split_response.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +require_relative '../../parsing/base_response' +require_relative 'params/split_parameters' +require_relative 'split_inference' + +module Mindee + module V2 + module Product + module Split + # HTTP response wrapper that embeds a V2 Inference. + class SplitResponse < Mindee::V2::Parsing::BaseResponse + # @return [SplitInference] Parsed inference payload. + attr_reader :inference + + @_slug = 'split' + @_params_type = Split::Params::SplitParameters + + # @param server_response [Hash] Hash parsed from the API JSON response. + def initialize(server_response) + super + + @inference = SplitInference.new(server_response['inference']) + end + + # String representation. + # @return [String] + def to_s + @inference.to_s + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/split/split_result.rb b/lib/mindee/v2/product/split/split_result.rb new file mode 100644 index 00000000..b11d284b --- /dev/null +++ b/lib/mindee/v2/product/split/split_result.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require_relative 'split_range' + +module Mindee + module V2 + module Product + module Split + # Result of a split utility inference. + class SplitResult + # @return [Array] List of results of splitped document regions. + attr_reader :splits + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + @splits = if server_response.key?('splits') + server_response['splits'].map do |split| + SplitRange.new(split) + end + end + end + + # String representation. + # @return [String] + def to_s + splits_str = @splits.join("\n") + + "Splits\n======\n#{splits_str}" + end + end + end + end + end +end diff --git a/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs b/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs new file mode 100644 index 00000000..57f97862 --- /dev/null +++ b/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs @@ -0,0 +1,25 @@ +module Mindee + module V2 + module Product + module Split + module Params + module SplitParameters + class SplitParameters + self.@_slug: String + + def self.from_hash: (params: Hash[String | Symbol, untyped]) -> SplitParameters + + def initialize: ( + String, + ?file_alias: String?, + ?webhook_ids: Array[String]?, + ?polling_options: Hash[Symbol | String, untyped] | Input::PollingOptions?, + ?close_file: bool? + ) -> void + end + end + end + end + end + end +end diff --git a/sig/mindee/v2/product/split/split_inference.rbs b/sig/mindee/v2/product/split/split_inference.rbs new file mode 100644 index 00000000..2cd0c31e --- /dev/null +++ b/sig/mindee/v2/product/split/split_inference.rbs @@ -0,0 +1,14 @@ +module Mindee + module V2 + module Product + module Split + class SplitInference + attr_reader result: SplitResult + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/split/split_range.rbs b/sig/mindee/v2/product/split/split_range.rbs new file mode 100644 index 00000000..14a390ef --- /dev/null +++ b/sig/mindee/v2/product/split/split_range.rbs @@ -0,0 +1,15 @@ +module Mindee + module V2 + module Product + module Split + class SplitRange + attr_reader page_range: Array[int] + attr_reader document_type: String + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/split/split_response.rbs b/sig/mindee/v2/product/split/split_response.rbs new file mode 100644 index 00000000..fd6ea7a3 --- /dev/null +++ b/sig/mindee/v2/product/split/split_response.rbs @@ -0,0 +1,23 @@ +# lib/mindee/v2/product/split/split_response.rb + +module Mindee + module V2 + module Product + module Split + class SplitResponse + self.@_slug: String + self.@_params_type: singleton(Params::SplitParameters) + + attr_reader inference: Mindee::V2::Product::Split::SplitInference + def initialize: (Hash[String | Symbol, untyped]) -> void + + def _params_type: -> singleton(Params::SplitParameters) + + def to_s: -> String + def self._params_type: () -> singleton(Params::SplitParameters) + def self._slug: () -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/split/split_result.rbs b/sig/mindee/v2/product/split/split_result.rbs new file mode 100644 index 00000000..e1be3f6a --- /dev/null +++ b/sig/mindee/v2/product/split/split_result.rbs @@ -0,0 +1,14 @@ +module Mindee + module V2 + module Product + module Split + class SplitResult + attr_reader splits: Array[SplitRange] + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/spec/v2/product/classification/classification_integration.rb b/spec/v2/product/classification/classification_integration.rb index 86df9890..1518f7f4 100644 --- a/spec/v2/product/classification/classification_integration.rb +++ b/spec/v2/product/classification/classification_integration.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require 'mindee' +require 'mindee/v2/product' describe Mindee::ClientV2, :integration, :v2 do let(:classification_model_id) do @@ -16,7 +17,7 @@ File.join(V2_PRODUCT_DATA_DIR, 'classification', 'default_invoice.jpg') ) - params = Mindee::Input::InferenceParameters.new(classification_model_id) + params = ClassificationParameters.new(classification_model_id) response = v2_client.enqueue_and_get_result( Mindee::V2::Product::Classification::ClassificationResponse, diff --git a/spec/v2/product/crop/crop_integration.rb b/spec/v2/product/crop/crop_integration.rb index cf386004..dd92d20c 100644 --- a/spec/v2/product/crop/crop_integration.rb +++ b/spec/v2/product/crop/crop_integration.rb @@ -3,8 +3,6 @@ require 'mindee' require 'mindee/v2/product' -require_relative '../../../data' - describe Mindee::ClientV2, :integration, :v2 do let(:crop_model_id) do ENV.fetch('MINDEE_V2_SE_TESTS_CROP_MODEL_ID', nil) @@ -19,7 +17,7 @@ File.join(V2_PRODUCT_DATA_DIR, 'crop', 'default_sample.jpg') ) - params = Mindee::Input::InferenceParameters.new(crop_model_id) + params = CropParameters.new(crop_model_id) response = v2_client.enqueue_and_get_result( Mindee::V2::Product::Crop::CropResponse, diff --git a/spec/v2/product/ocr/ocr_integration.rb b/spec/v2/product/ocr/ocr_integration.rb index 58958829..812b3781 100644 --- a/spec/v2/product/ocr/ocr_integration.rb +++ b/spec/v2/product/ocr/ocr_integration.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require 'mindee' +require 'mindee/v2/product' describe Mindee::ClientV2, :integration, :v2 do let(:ocr_model_id) do @@ -16,7 +17,7 @@ File.join(V2_PRODUCT_DATA_DIR, 'ocr', 'default_sample.jpg') ) - params = Mindee::Input::InferenceParameters.new(ocr_model_id) + params = OcrParameters.new(ocr_model_id) response = v2_client.enqueue_and_get_result( Mindee::V2::Product::Ocr::OcrResponse, diff --git a/spec/v2/product/split/split_integration.rb b/spec/v2/product/split/split_integration.rb new file mode 100644 index 00000000..0e5114c3 --- /dev/null +++ b/spec/v2/product/split/split_integration.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +require 'mindee' +require 'mindee/v2/product' + +describe Mindee::ClientV2, :integration, :v2 do + let(:split_model_id) do + ENV.fetch('MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID') + end + + let(:v2_client) do + Mindee::ClientV2.new + end + + it 'processes split default sample correctly' do + input_source = Mindee::Input::Source::PathInputSource.new( + File.join(V2_PRODUCT_DATA_DIR, 'split', 'default_sample.pdf') + ) + + params = SplitParameters.new(split_model_id) + + response = v2_client.enqueue_and_get_result( + Mindee::V2::Product::Split::SplitResponse, + input_source, + params + ) + + expect(response.inference).not_to be_nil + expect(response.inference.file.name).to eq('default_sample.pdf') + expect(response.inference.result.splits).not_to be_empty + expect(response.inference.result.splits.size).to eq(2) + end +end diff --git a/spec/v2/product/split/split_spec.rb b/spec/v2/product/split/split_spec.rb new file mode 100644 index 00000000..7a0285bc --- /dev/null +++ b/spec/v2/product/split/split_spec.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +require 'json' +require 'mindee' + +describe Mindee::V2::Product::Split::SplitResponse, :v2 do + let(:split_data_dir) { File.join(V2_PRODUCT_DATA_DIR, 'split') } + + it 'parses a single split properly' do + json_path = File.join(split_data_dir, 'split_single.json') + json_sample = JSON.parse(File.read(json_path)) + + response = Mindee::V2::Product::Split::SplitResponse.new(json_sample) + + expect(response.inference).to be_a(Mindee::V2::Product::Split::SplitInference) + expect(response.inference.result.splits).not_to be_empty + expect(response.inference.result.splits[0].page_range.size).to eq(2) + expect(response.inference.result.splits[0].page_range[0]).to eq(0) + expect(response.inference.result.splits[0].page_range[1]).to eq(0) + expect(response.inference.result.splits[0].document_type).to eq('receipt') + end + + it 'parses multiple splits properly' do + json_path = File.join(split_data_dir, 'split_multiple.json') + json_sample = JSON.parse(File.read(json_path)) + + response = Mindee::V2::Product::Split::SplitResponse.new(json_sample) + + expect(response.inference).to be_a(Mindee::V2::Product::Split::SplitInference) + expect(response.inference.result).to be_a(Mindee::V2::Product::Split::SplitResult) + expect(response.inference.result.splits[0]).to be_a(Mindee::V2::Product::Split::SplitRange) + expect(response.inference.result.splits.size).to eq(3) + + split_zero = response.inference.result.splits[0] + expect(split_zero.page_range.size).to eq(2) + expect(split_zero.page_range[0]).to eq(0) + expect(split_zero.page_range[1]).to eq(0) + expect(split_zero.document_type).to eq('invoice') + + split_one = response.inference.result.splits[1] + expect(split_one.page_range.size).to eq(2) + expect(split_one.page_range[0]).to eq(1) + expect(split_one.page_range[1]).to eq(3) + expect(split_one.document_type).to eq('invoice') + + split_two = response.inference.result.splits[2] + expect(split_two.page_range.size).to eq(2) + expect(split_two.page_range[0]).to eq(4) + expect(split_two.page_range[1]).to eq(4) + expect(split_two.document_type).to eq('invoice') + end +end From 8452af4b35adb2ef3b3f0d169a71d6f7c4a739bd Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 25 Feb 2026 18:00:37 +0100 Subject: [PATCH 11/23] fix imports? --- spec/v2/product/classification/classification_integration.rb | 2 +- spec/v2/product/crop/crop_integration.rb | 2 +- spec/v2/product/ocr/ocr_integration.rb | 2 +- spec/v2/product/split/split_integration.rb | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/spec/v2/product/classification/classification_integration.rb b/spec/v2/product/classification/classification_integration.rb index 1518f7f4..c44addd4 100644 --- a/spec/v2/product/classification/classification_integration.rb +++ b/spec/v2/product/classification/classification_integration.rb @@ -17,7 +17,7 @@ File.join(V2_PRODUCT_DATA_DIR, 'classification', 'default_invoice.jpg') ) - params = ClassificationParameters.new(classification_model_id) + params = Mindee::V2::Product::Classification::Params::ClassificationParameters.new(classification_model_id) response = v2_client.enqueue_and_get_result( Mindee::V2::Product::Classification::ClassificationResponse, diff --git a/spec/v2/product/crop/crop_integration.rb b/spec/v2/product/crop/crop_integration.rb index dd92d20c..537e17a7 100644 --- a/spec/v2/product/crop/crop_integration.rb +++ b/spec/v2/product/crop/crop_integration.rb @@ -17,7 +17,7 @@ File.join(V2_PRODUCT_DATA_DIR, 'crop', 'default_sample.jpg') ) - params = CropParameters.new(crop_model_id) + params = Mindee::V2::Product::Crop::Params::CropParameters.new(crop_model_id) response = v2_client.enqueue_and_get_result( Mindee::V2::Product::Crop::CropResponse, diff --git a/spec/v2/product/ocr/ocr_integration.rb b/spec/v2/product/ocr/ocr_integration.rb index 812b3781..71ff595f 100644 --- a/spec/v2/product/ocr/ocr_integration.rb +++ b/spec/v2/product/ocr/ocr_integration.rb @@ -17,7 +17,7 @@ File.join(V2_PRODUCT_DATA_DIR, 'ocr', 'default_sample.jpg') ) - params = OcrParameters.new(ocr_model_id) + params = Mindee::V2::Product::Ocr::Params::OcrParameters.new(ocr_model_id) response = v2_client.enqueue_and_get_result( Mindee::V2::Product::Ocr::OcrResponse, diff --git a/spec/v2/product/split/split_integration.rb b/spec/v2/product/split/split_integration.rb index 0e5114c3..5729213c 100644 --- a/spec/v2/product/split/split_integration.rb +++ b/spec/v2/product/split/split_integration.rb @@ -17,7 +17,7 @@ File.join(V2_PRODUCT_DATA_DIR, 'split', 'default_sample.pdf') ) - params = SplitParameters.new(split_model_id) + params = Mindee::V2::Product::Split::Params::SplitParameters.new(split_model_id) response = v2_client.enqueue_and_get_result( Mindee::V2::Product::Split::SplitResponse, From 0f23653f2d21eb520156b56e306eb9597d4aac79 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 25 Feb 2026 18:25:35 +0100 Subject: [PATCH 12/23] fix api calls --- lib/mindee/client_v2.rb | 12 +++- lib/mindee/http/mindee_api_v2.rb | 6 +- lib/mindee/input/base_parameters.rb | 10 ++-- .../v2/product/crop/params/crop_parameters.rb | 59 ++++++++++--------- sig/mindee/client_v2.rbs | 2 +- .../crop_parameters/crop_parameters.rbs | 22 ++++--- 6 files changed, 59 insertions(+), 52 deletions(-) diff --git a/lib/mindee/client_v2.rb b/lib/mindee/client_v2.rb index 01dbdf3b..8626c707 100644 --- a/lib/mindee/client_v2.rb +++ b/lib/mindee/client_v2.rb @@ -34,6 +34,14 @@ def get_result(response_class, inference_id) @mindee_api.req_get_result(response_class, inference_id) end + # Retrieves a result from a given URL. + # @param url [String] + # @param response_class [Class] + # @return [Mindee::Parsing::V2::BaseResponse] + def get_result_url(response_class, url) + @mindee_api.req_get_result_url(response_class, url) + end + # Retrieves an inference. # @param job_id [String] # @return [Mindee::Parsing::V2::JobResponse] @@ -96,8 +104,8 @@ def enqueue_and_get_result( while retry_counter < normalized_params.polling_options.max_retries if poll_results.job.status == 'Failed' break - elsif poll_results.job.status == 'Processed' - return get_result(response_type, poll_results.job.id) + elsif !poll_results.job.result_url.nil? + return get_result_url(response_type, poll_results.job.result_url) end logger.debug( diff --git a/lib/mindee/http/mindee_api_v2.rb b/lib/mindee/http/mindee_api_v2.rb index 698018d8..21e487c7 100644 --- a/lib/mindee/http/mindee_api_v2.rb +++ b/lib/mindee/http/mindee_api_v2.rb @@ -46,8 +46,9 @@ def req_get_inference(inference_id) # @return [Mindee::Parsing::V2::BaseResponse] def req_get_result(response_class, inference_id) @settings.check_api_key - response = inference_result_req_get( - inference_id + response = result_req_get( + inference_id, + response_class ) response_class.new(process_response(response)) end @@ -172,7 +173,6 @@ def enqueue_form_options(form_data, params) # @return [Net::HTTPResponse, nil] def enqueue(input_source, params) uri = URI("#{@settings.base_url}/products/#{params._slug}/enqueue") - puts "POST #{uri} from #{params.class}" form_data = if input_source.is_a?(Mindee::Input::Source::URLInputSource) [['url', input_source.url]] # : Array[untyped] diff --git a/lib/mindee/input/base_parameters.rb b/lib/mindee/input/base_parameters.rb index d28c1e30..d93c0723 100644 --- a/lib/mindee/input/base_parameters.rb +++ b/lib/mindee/input/base_parameters.rb @@ -49,6 +49,11 @@ def self._slug '' end + # @return [String] Slug for the endpoint. + def _slug + self.class._slug + end + def self.from_hash(params: {}) load_from_hash(params: params) new( @@ -92,11 +97,6 @@ def append_form_data(form_data) form_data end - # @return [String] Slug for the endpoint. - def _slug - self.class._slug - end - # Validates the parameters for async auto-polling def validate_async_params min_delay_sec = 1 diff --git a/lib/mindee/v2/product/crop/params/crop_parameters.rb b/lib/mindee/v2/product/crop/params/crop_parameters.rb index 82a0288a..49fafc3a 100644 --- a/lib/mindee/v2/product/crop/params/crop_parameters.rb +++ b/lib/mindee/v2/product/crop/params/crop_parameters.rb @@ -5,37 +5,38 @@ module V2 module Product module Crop module Params - class CropParameters < Input::BaseParameters - # Parameters accepted by the crop utility v2 endpoint. - class CropParameters < Mindee::Input::BaseParameters - @_slug = 'crop' + # Parameters accepted by the crop utility v2 endpoint. + class CropParameters < Mindee::Input::BaseParameters + # @return [String] Slug for the endpoint. + def self._slug + 'crop' + end - # @param [String] model_id ID of the model - # @param [String, nil] file_alias File alias, if applicable. - # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. - # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. - # @param [Boolean, nil] close_file Whether to close the file after parsing. - def initialize( - model_id, - file_alias: nil, - webhook_ids: nil, - polling_options: nil, - close_file: true - ) - super - end + # @param [String] model_id ID of the model + # @param [String, nil] file_alias File alias, if applicable. + # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. + # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Boolean, nil] close_file Whether to close the file after parsing. + def initialize( + model_id, + file_alias: nil, + webhook_ids: nil, + polling_options: nil, + close_file: true + ) + super + end - # Loads the parameters from a Hash. - # @param [Hash] params Parameters to provide as a hash. - # @return [CropParameters] - def self.from_hash(params: {}) - CropParameters.new( - params.fetch(:model_id), - file_alias: params.fetch(:file_alias, nil), - webhook_ids: params.fetch(:webhook_ids, nil), - close_file: params.fetch(:close_file, true) - ) - end + # Loads the parameters from a Hash. + # @param [Hash] params Parameters to provide as a hash. + # @return [CropParameters] + def self.from_hash(params: {}) + CropParameters.new( + params.fetch(:model_id), + file_alias: params.fetch(:file_alias, nil), + webhook_ids: params.fetch(:webhook_ids, nil), + close_file: params.fetch(:close_file, true) + ) end end end diff --git a/sig/mindee/client_v2.rbs b/sig/mindee/client_v2.rbs index 42dd931a..b7bff121 100644 --- a/sig/mindee/client_v2.rbs +++ b/sig/mindee/client_v2.rbs @@ -8,11 +8,11 @@ module Mindee class ClientV2 attr_reader mindee_api: HTTP::MindeeApiV2 - def logger: () -> Logger def initialize: (?api_key: String) -> void def get_inference: (String) -> Mindee::Parsing::V2::InferenceResponse def get_result: [T] (HTTP::_ResponseFactory[T] response_class, String inference_id) -> T + def get_result_url: [T] (HTTP::_ResponseFactory[T] response_class, String inference_id) -> T def get_job: (String) -> Mindee::Parsing::V2::JobResponse def enqueue_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters, disable_redundant_warnings: bool) -> Mindee::Parsing::V2::JobResponse def enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::BaseParameters) -> Mindee::Parsing::V2::JobResponse diff --git a/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs b/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs index 0b451590..8e7a42a5 100644 --- a/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs +++ b/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs @@ -3,20 +3,18 @@ module Mindee module Product module Crop module Params - module CropParameters - class CropParameters - self.@_slug: String + class CropParameters + def self._slug: -> String - def self.from_hash: (params: Hash[String | Symbol, untyped]) -> CropParameters + def self.from_hash: (params: Hash[String | Symbol, untyped]) -> CropParameters - def initialize: ( - String, - ?file_alias: String?, - ?webhook_ids: Array[String]?, - ?polling_options: Hash[Symbol | String, untyped] | Input::PollingOptions?, - ?close_file: bool? - ) -> void - end + def initialize: ( + String, + ?file_alias: String?, + ?webhook_ids: Array[String]?, + ?polling_options: Hash[Symbol | String, untyped] | Input::PollingOptions?, + ?close_file: bool? + ) -> void end end end From 2a42b4fac73aacc87d8230bc856ad9bd4ad59e3e Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 25 Feb 2026 18:43:26 +0100 Subject: [PATCH 13/23] fix typos --- .../params/classification_parameters.rb | 59 ++++++++++--------- .../v2/product/ocr/params/ocr_parameters.rb | 59 ++++++++++--------- .../product/split/params/split_parameters.rb | 59 ++++++++++--------- .../classification_parameters.rbs | 22 ++++--- .../params/ocr_parameters/ocr_parameters.rbs | 23 ++++---- .../split_parameters/split_parameters.rbs | 22 ++++--- 6 files changed, 121 insertions(+), 123 deletions(-) diff --git a/lib/mindee/v2/product/classification/params/classification_parameters.rb b/lib/mindee/v2/product/classification/params/classification_parameters.rb index 4b2b1146..d4ff6ae6 100644 --- a/lib/mindee/v2/product/classification/params/classification_parameters.rb +++ b/lib/mindee/v2/product/classification/params/classification_parameters.rb @@ -5,37 +5,38 @@ module V2 module Product module Classification module Params - class ClassificationParameters < Input::BaseParameters - # Parameters accepted by the classification utility v2 endpoint. - class ClassificationParameters < Mindee::Input::BaseParameters - @_slug = 'classification' + # Parameters accepted by the classification utility v2 endpoint. + class ClassificationParameters < Mindee::Input::BaseParameters + # @return [String] Slug for the endpoint. + def self._slug + 'classification' + end - # @param [String] model_id ID of the model - # @param [String, nil] file_alias File alias, if applicable. - # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. - # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. - # @param [Boolean, nil] close_file Whether to close the file after parsing. - def initialize( - model_id, - file_alias: nil, - webhook_ids: nil, - polling_options: nil, - close_file: true - ) - super - end + # @param [String] model_id ID of the model + # @param [String, nil] file_alias File alias, if applicable. + # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. + # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Boolean, nil] close_file Whether to close the file after parsing. + def initialize( + model_id, + file_alias: nil, + webhook_ids: nil, + polling_options: nil, + close_file: true + ) + super + end - # Loads the parameters from a Hash. - # @param [Hash] params Parameters to provide as a hash. - # @return [ClassificationParameters] - def self.from_hash(params: {}) - ClassificationParameters.new( - params.fetch(:model_id), - file_alias: params.fetch(:file_alias, nil), - webhook_ids: params.fetch(:webhook_ids, nil), - close_file: params.fetch(:close_file, true) - ) - end + # Loads the parameters from a Hash. + # @param [Hash] params Parameters to provide as a hash. + # @return [ClassificationParameters] + def self.from_hash(params: {}) + ClassificationParameters.new( + params.fetch(:model_id), + file_alias: params.fetch(:file_alias, nil), + webhook_ids: params.fetch(:webhook_ids, nil), + close_file: params.fetch(:close_file, true) + ) end end end diff --git a/lib/mindee/v2/product/ocr/params/ocr_parameters.rb b/lib/mindee/v2/product/ocr/params/ocr_parameters.rb index 4544a56d..e2d56eb6 100644 --- a/lib/mindee/v2/product/ocr/params/ocr_parameters.rb +++ b/lib/mindee/v2/product/ocr/params/ocr_parameters.rb @@ -5,37 +5,38 @@ module V2 module Product module Ocr module Params - class OcrParameters < Input::BaseParameters - # Parameters accepted by the ocr utility v2 endpoint. - class OcrParameters < Mindee::Input::BaseParameters - @_slug = 'ocr' + # Parameters accepted by the ocr utility v2 endpoint. + class OcrParameters < Mindee::Input::BaseParameters + # @return [String] Slug for the endpoint. + def self._slug + 'ocr' + end - # @param [String] model_id ID of the model - # @param [String, nil] file_alias File alias, if applicable. - # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. - # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. - # @param [Boolean, nil] close_file Whether to close the file after parsing. - def initialize( - model_id, - file_alias: nil, - webhook_ids: nil, - polling_options: nil, - close_file: true - ) - super - end + # @param [String] model_id ID of the model + # @param [String, nil] file_alias File alias, if applicable. + # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. + # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Boolean, nil] close_file Whether to close the file after parsing. + def initialize( + model_id, + file_alias: nil, + webhook_ids: nil, + polling_options: nil, + close_file: true + ) + super + end - # Loads the parameters from a Hash. - # @param [Hash] params Parameters to provide as a hash. - # @return [OcrParameters] - def self.from_hash(params: {}) - OcrParameters.new( - params.fetch(:model_id), - file_alias: params.fetch(:file_alias, nil), - webhook_ids: params.fetch(:webhook_ids, nil), - close_file: params.fetch(:close_file, true) - ) - end + # Loads the parameters from a Hash. + # @param [Hash] params Parameters to provide as a hash. + # @return [OcrParameters] + def self.from_hash(params: {}) + OcrParameters.new( + params.fetch(:model_id), + file_alias: params.fetch(:file_alias, nil), + webhook_ids: params.fetch(:webhook_ids, nil), + close_file: params.fetch(:close_file, true) + ) end end end diff --git a/lib/mindee/v2/product/split/params/split_parameters.rb b/lib/mindee/v2/product/split/params/split_parameters.rb index a82a1581..c290e3ba 100644 --- a/lib/mindee/v2/product/split/params/split_parameters.rb +++ b/lib/mindee/v2/product/split/params/split_parameters.rb @@ -5,37 +5,38 @@ module V2 module Product module Split module Params - class SplitParameters < Input::BaseParameters - # Parameters accepted by the split utility v2 endpoint. - class SplitParameters < Mindee::Input::BaseParameters - @_slug = 'split' + # Parameters accepted by the split utility v2 endpoint. + class SplitParameters < Mindee::Input::BaseParameters + # @return [String] Slug for the endpoint. + def self._slug + 'split' + end - # @param [String] model_id ID of the model - # @param [String, nil] file_alias File alias, if applicable. - # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. - # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. - # @param [Boolean, nil] close_file Whether to close the file after parsing. - def initialize( - model_id, - file_alias: nil, - webhook_ids: nil, - polling_options: nil, - close_file: true - ) - super - end + # @param [String] model_id ID of the model + # @param [String, nil] file_alias File alias, if applicable. + # @param [Array, nil] webhook_ids List of webhook IDs to propagate the API response to. + # @param [Hash, nil] polling_options Options for polling. Set only if having timeout issues. + # @param [Boolean, nil] close_file Whether to close the file after parsing. + def initialize( + model_id, + file_alias: nil, + webhook_ids: nil, + polling_options: nil, + close_file: true + ) + super + end - # Loads the parameters from a Hash. - # @param [Hash] params Parameters to provide as a hash. - # @return [SplitParameters] - def self.from_hash(params: {}) - SplitParameters.new( - params.fetch(:model_id), - file_alias: params.fetch(:file_alias, nil), - webhook_ids: params.fetch(:webhook_ids, nil), - close_file: params.fetch(:close_file, true) - ) - end + # Loads the parameters from a Hash. + # @param [Hash] params Parameters to provide as a hash. + # @return [SplitParameters] + def self.from_hash(params: {}) + SplitParameters.new( + params.fetch(:model_id), + file_alias: params.fetch(:file_alias, nil), + webhook_ids: params.fetch(:webhook_ids, nil), + close_file: params.fetch(:close_file, true) + ) end end end diff --git a/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs b/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs index 662a52a8..113e22e7 100644 --- a/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs +++ b/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs @@ -3,20 +3,18 @@ module Mindee module Product module Classification module Params - module ClassificationParameters - class ClassificationParameters - self.@_slug: String + class ClassificationParameters + def self._slug: -> String - def self.from_hash: (params: Hash[String | Symbol, untyped]) -> ClassificationParameters + def self.from_hash: (params: Hash[String | Symbol, untyped]) -> ClassificationParameters - def initialize: ( - String, - ?file_alias: String?, - ?webhook_ids: Array[String]?, - ?polling_options: Hash[Symbol | String, untyped] | Input::PollingOptions?, - ?close_file: bool? - ) -> void - end + def initialize: ( + String, + ?file_alias: String?, + ?webhook_ids: Array[String]?, + ?polling_options: Hash[Symbol | String, untyped] | Input::PollingOptions?, + ?close_file: bool? + ) -> void end end end diff --git a/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs b/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs index 18732072..b33cbaa2 100644 --- a/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs +++ b/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs @@ -3,21 +3,20 @@ module Mindee module Product module Ocr module Params - module OcrParameters - class OcrParameters - self.@_slug: String + class OcrParameters + def self._slug: -> String - def self.from_hash: (params: Hash[String | Symbol, untyped]) -> OcrParameters + def self.from_hash: (params: Hash[String | Symbol, untyped]) -> OcrParameters - def initialize: ( - String, - ?file_alias: String?, - ?webhook_ids: Array[String]?, - ?polling_options: Hash[Symbol | String, untyped] | Input::PollingOptions?, - ?close_file: bool? - ) -> void - end + def initialize: ( + String, + ?file_alias: String?, + ?webhook_ids: Array[String]?, + ?polling_options: Hash[Symbol | String, untyped] | Input::PollingOptions?, + ?close_file: bool? + ) -> void end + end end end diff --git a/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs b/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs index 57f97862..a17053af 100644 --- a/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs +++ b/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs @@ -3,20 +3,18 @@ module Mindee module Product module Split module Params - module SplitParameters - class SplitParameters - self.@_slug: String + class SplitParameters + def self._slug: -> String - def self.from_hash: (params: Hash[String | Symbol, untyped]) -> SplitParameters + def self.from_hash: (params: Hash[String | Symbol, untyped]) -> SplitParameters - def initialize: ( - String, - ?file_alias: String?, - ?webhook_ids: Array[String]?, - ?polling_options: Hash[Symbol | String, untyped] | Input::PollingOptions?, - ?close_file: bool? - ) -> void - end + def initialize: ( + String, + ?file_alias: String?, + ?webhook_ids: Array[String]?, + ?polling_options: Hash[Symbol | String, untyped] | Input::PollingOptions?, + ?close_file: bool? + ) -> void end end end From f7ece5f7ea790dcd64ba43e96c83e131cbfee5d1 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 26 Feb 2026 10:32:21 +0100 Subject: [PATCH 14/23] fix job polling & accessing --- lib/mindee/client_v2.rb | 26 +++++++++----------------- lib/mindee/http/mindee_api_v2.rb | 28 +++++++++++++++++++++------- sig/mindee/http/mindee_api_v2.rbs | 7 ++++--- 3 files changed, 34 insertions(+), 27 deletions(-) diff --git a/lib/mindee/client_v2.rb b/lib/mindee/client_v2.rb index 8626c707..1c34ccd0 100644 --- a/lib/mindee/client_v2.rb +++ b/lib/mindee/client_v2.rb @@ -26,27 +26,19 @@ def get_inference(inference_id) @mindee_api.req_get_inference(inference_id) end - # Retrieves a result from a given queue. - # @param inference_id [String] - # @param response_class [Class] - # @return [Mindee::Parsing::V2::BaseResponse] - def get_result(response_class, inference_id) - @mindee_api.req_get_result(response_class, inference_id) - end - - # Retrieves a result from a given URL. - # @param url [String] + # Retrieves a result from a given queue or URL to the result. + # @param resource [String] ID of the inference or URL to the result. # @param response_class [Class] # @return [Mindee::Parsing::V2::BaseResponse] - def get_result_url(response_class, url) - @mindee_api.req_get_result_url(response_class, url) + def get_result(response_class, resource) + @mindee_api.req_get_result(response_class, resource) end - # Retrieves an inference. - # @param job_id [String] + # Retrieves an inference from a given queue or URL to the job. + # @param resource [String] ID of the job or URL to the job. # @return [Mindee::Parsing::V2::JobResponse] - def get_job(job_id) - @mindee_api.req_get_job(job_id) + def get_job(resource) + @mindee_api.req_get_job(resource) end # Enqueue a document for async parsing. @@ -105,7 +97,7 @@ def enqueue_and_get_result( if poll_results.job.status == 'Failed' break elsif !poll_results.job.result_url.nil? - return get_result_url(response_type, poll_results.job.result_url) + return get_result(response_type, poll_results.job.result_url) end logger.debug( diff --git a/lib/mindee/http/mindee_api_v2.rb b/lib/mindee/http/mindee_api_v2.rb index 21e487c7..0393b14c 100644 --- a/lib/mindee/http/mindee_api_v2.rb +++ b/lib/mindee/http/mindee_api_v2.rb @@ -42,12 +42,14 @@ def req_get_inference(inference_id) # Retrieves a result from a given queue. # @param response_class [Class] - # @param inference_id [String] + # @param resource [String] ID of the inference or URL to the result. # @return [Mindee::Parsing::V2::BaseResponse] - def req_get_result(response_class, inference_id) + def req_get_result(response_class, resource) + return req_get_result_url(response_class, resource) if uri?(resource) + @settings.check_api_key response = result_req_get( - inference_id, + resource, response_class ) response_class.new(process_response(response)) @@ -55,16 +57,20 @@ def req_get_result(response_class, inference_id) # Retrieves a queued job. # - # @param job_id [String] + # @param resource [String] ID of the job or URL to the job. # @return [Mindee::Parsing::V2::JobResponse] - def req_get_job(job_id) + def req_get_job(resource) + return req_get_job_url(resource) if uri?(resource) + @settings.check_api_key response = inference_job_req_get( - job_id + resource ) Mindee::Parsing::V2::JobResponse.new(process_response(response)) end + private + # Retrieves a queued job. # # @param url [String] @@ -86,7 +92,15 @@ def req_get_result_url(result_class, url) result_class.new(process_response(response)) end - private + # @param resource [String] Resource to check. + # @return [Boolean] + def uri?(resource) + uri = URI.parse(resource) + throw Mindee::Errors::MindeeError, 'HTTP is not supported.' if uri.scheme == 'http' + uri.scheme == 'https' + rescue URI::BadURIError, URI::InvalidURIError + false + end # Converts an HTTP response to a parsed response object. # diff --git a/sig/mindee/http/mindee_api_v2.rbs b/sig/mindee/http/mindee_api_v2.rbs index d08effbe..60bb1269 100644 --- a/sig/mindee/http/mindee_api_v2.rbs +++ b/sig/mindee/http/mindee_api_v2.rbs @@ -11,9 +11,7 @@ module Mindee def initialize: (?api_key: String?) -> void - def req_get_job_url: (String) -> Parsing::V2::JobResponse def req_get_result: [T] (_ResponseFactory[T] response_class, String inference_id) -> T - def req_get_result_url: [T] (_ResponseFactory[T] result_class, String url) -> T def req_post_enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::BaseParameters) -> Parsing::V2::JobResponse def req_get_inference: (String) -> Parsing::V2::InferenceResponse @@ -25,8 +23,11 @@ module Mindee def result_req_get: [T] (String, _ResponseFactory[T] result_class) -> Net::HTTPResponse def enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::BaseParameters) -> Net::HTTPResponse? - private + private + def req_get_job_url: (String) -> Parsing::V2::JobResponse + def req_get_result_url: [T] (_ResponseFactory[T] result_class, String url) -> T + def uri?: (String) -> bool def enqueue_form_options: (Array[untyped], Input::InferenceParameters) -> Array[untyped] end From ab29344bf725ba8410b91ad1487a727f98d94fed Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 26 Feb 2026 11:42:02 +0100 Subject: [PATCH 15/23] add code samples + fix url access --- .github/workflows/_test-code-samples.yml | 10 +++- .github/workflows/_test-integrations.yml | 2 +- .gitignore | 2 + .rubocop.yml | 2 +- docs/code_samples/v2_classification.txt | 32 ++++++++++ docs/code_samples/v2_crop.txt | 32 ++++++++++ .../{default_v2.txt => v2_default.txt} | 0 docs/code_samples/v2_default_webhook.txt | 43 +++++++++++++ docs/code_samples/v2_ocr.txt | 32 ++++++++++ docs/code_samples/v2_split.txt | 32 ++++++++++ ...ode_samples.sh => test_code_samples_v1.sh} | 14 +---- spec/test_code_samples_v2.sh | 60 +++++++++++++++++++ 12 files changed, 245 insertions(+), 16 deletions(-) create mode 100644 docs/code_samples/v2_classification.txt create mode 100644 docs/code_samples/v2_crop.txt rename docs/code_samples/{default_v2.txt => v2_default.txt} (100%) create mode 100644 docs/code_samples/v2_default_webhook.txt create mode 100644 docs/code_samples/v2_ocr.txt create mode 100644 docs/code_samples/v2_split.txt rename spec/{test_code_samples.sh => test_code_samples_v1.sh} (79%) create mode 100755 spec/test_code_samples_v2.sh diff --git a/.github/workflows/_test-code-samples.yml b/.github/workflows/_test-code-samples.yml index f00db7e3..36913bdf 100644 --- a/.github/workflows/_test-code-samples.yml +++ b/.github/workflows/_test-code-samples.yml @@ -28,8 +28,14 @@ jobs: ruby-version: ${{ matrix.ruby }} bundler-cache: true - - name: Tests code samples + - name: Tests V1 code samples env: MINDEE_LOG_LEVEL: DEBUG run: | - ./spec/test_code_samples.sh ${{ secrets.MINDEE_ACCOUNT_SE_TESTS }} ${{ secrets.MINDEE_ENDPOINT_SE_TESTS }} ${{ secrets.MINDEE_API_KEY_SE_TESTS }} ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} {{secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID}} + ./spec/test_code_samples_v1.sh ${{ secrets.MINDEE_ACCOUNT_SE_TESTS }} ${{ secrets.MINDEE_ENDPOINT_SE_TESTS }} ${{ secrets.MINDEE_API_KEY_SE_TESTS }} ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} + + - name: Tests V2 code samples + env: + MINDEE_LOG_LEVEL: DEBUG + run: | + ./spec/test_code_samples_v2.sh ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} ${{ secrets.MINDEE_V2_SE_TESTS_FAILURE_WEBHOOK_ID }} ${{ secrets.MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID }} ${{ secrets.MINDEE_V2_SE_TESTS_CROP_MODEL_ID }} ${{ secrets.MINDEE_V2_SE_TESTS_OCR_MODEL_ID }} ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }} diff --git a/.github/workflows/_test-integrations.yml b/.github/workflows/_test-integrations.yml index 2c2fbebc..c62ac12d 100644 --- a/.github/workflows/_test-integrations.yml +++ b/.github/workflows/_test-integrations.yml @@ -58,8 +58,8 @@ jobs: MINDEE_V2_SE_TESTS_BLANK_PDF_URL: ${{ secrets.MINDEE_V2_SE_TESTS_BLANK_PDF_URL }} MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID }} MINDEE_V2_SE_TESTS_CROP_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CROP_MODEL_ID }} - MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }} MINDEE_V2_SE_TESTS_OCR_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_OCR_MODEL_ID }} + MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }} MINDEE_LOG_LEVEL: DEBUG run: | bundle exec rake integration diff --git a/.gitignore b/.gitignore index 01872a55..d37f6b50 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,8 @@ /test/version_tmp/ /tmp/ _test.rb +_test_v1.rb +_test_v2.rb /vendor /mindee-*/ local-test diff --git a/.rubocop.yml b/.rubocop.yml index 7b56d374..fbb91a84 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -6,7 +6,7 @@ AllCops: - 'tmp/**/*' - '.git/**/*' - 'bin/*' - - _test.rb + - _test*.rb - local_test/* - Steepfile diff --git a/docs/code_samples/v2_classification.txt b/docs/code_samples/v2_classification.txt new file mode 100644 index 00000000..d25a58d8 --- /dev/null +++ b/docs/code_samples/v2_classification.txt @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require 'mindee' +require 'mindee/v2/product' + +include Mindee::V2::Product::Classification + +input_path = '/path/to/the/file.ext' +api_key = 'MY_API_KEY' +model_id = 'MY_MODEL_ID' + +# Init a new client +mindee_client = Mindee::ClientV2.new(api_key: api_key) + +# Set inference parameters +classification_params = Params::ClassificationParameters.new( + # ID of the model, required. + model_id, +) + +# Load a file from disk +input_source = Mindee::Input::Source::PathInputSource.new(input_path) + +# Send for processing +response = mindee_client.enqueue_and_get_result( + ClassificationResponse, + input_source, + classification_params +) + +# Print a brief summary of the parsed data +puts response.inference diff --git a/docs/code_samples/v2_crop.txt b/docs/code_samples/v2_crop.txt new file mode 100644 index 00000000..8ddb921e --- /dev/null +++ b/docs/code_samples/v2_crop.txt @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require 'mindee' +require 'mindee/v2/product' + +include Mindee::V2::Product::Crop + +input_path = '/path/to/the/file.ext' +api_key = 'MY_API_KEY' +model_id = 'MY_MODEL_ID' + +# Init a new client +mindee_client = Mindee::ClientV2.new(api_key: api_key) + +# Set inference parameters +crop_params = Params::CropParameters.new( + # ID of the model, required. + model_id, +) + +# Load a file from disk +input_source = Mindee::Input::Source::PathInputSource.new(input_path) + +# Send for processing +response = mindee_client.enqueue_and_get_result( + CropResponse, + input_source, + crop_params +) + +# Print a brief summary of the parsed data +puts response.inference diff --git a/docs/code_samples/default_v2.txt b/docs/code_samples/v2_default.txt similarity index 100% rename from docs/code_samples/default_v2.txt rename to docs/code_samples/v2_default.txt diff --git a/docs/code_samples/v2_default_webhook.txt b/docs/code_samples/v2_default_webhook.txt new file mode 100644 index 00000000..c09ad207 --- /dev/null +++ b/docs/code_samples/v2_default_webhook.txt @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +require 'mindee' + +input_path = '/path/to/the/file.ext' +api_key = 'MY_API_KEY' +model_id = 'MY_MODEL_ID' + +# Init a new client +mindee_client = Mindee::ClientV2.new(api_key: api_key) + +# Set inference parameters +inference_params = Mindee::Input::InferenceParameters.new( + # ID of the model, required. + model_id, + # Add any number of webhook IDs here. + webhook_ids: ["MY_WEBHOOK_ID"], + # Options: set to `true` or `false` to override defaults + + # Enhance extraction accuracy with Retrieval-Augmented Generation. + rag: nil, + # Extract the full text content from the document as strings. + raw_text: nil, + # Calculate bounding box polygons for all fields. + polygon: nil, + # Boost the precision and accuracy of all extractions. + # Calculate confidence scores for all fields. + confidence: nil +) + +# Load a file from disk +input_source = Mindee::Input::Source::PathInputSource.new(input_path) + +# Send for processing +response = mindee_client.enqueue_inference( + input_source, + inference_params # This parameter can also be provided as a Hash. +) + +# Print the job ID +job_id = response.job.id + +puts job_id diff --git a/docs/code_samples/v2_ocr.txt b/docs/code_samples/v2_ocr.txt new file mode 100644 index 00000000..94d4fb85 --- /dev/null +++ b/docs/code_samples/v2_ocr.txt @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require 'mindee' +require 'mindee/v2/product' + +include Mindee::V2::Product::Ocr + +input_path = '/path/to/the/file.ext' +api_key = 'MY_API_KEY' +model_id = 'MY_MODEL_ID' + +# Init a new client +mindee_client = Mindee::ClientV2.new(api_key: api_key) + +# Set inference parameters +ocr_params = Params::OcrParameters.new( + # ID of the model, required. + model_id, +) + +# Load a file from disk +input_source = Mindee::Input::Source::PathInputSource.new(input_path) + +# Send for processing +response = mindee_client.enqueue_and_get_result( + OcrResponse, + input_source, + ocr_params +) + +# Print a brief summary of the parsed data +puts response.inference diff --git a/docs/code_samples/v2_split.txt b/docs/code_samples/v2_split.txt new file mode 100644 index 00000000..ff649a93 --- /dev/null +++ b/docs/code_samples/v2_split.txt @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require 'mindee' +require 'mindee/v2/product' + +include Mindee::V2::Product::Split + +input_path = '/path/to/the/file.ext' +api_key = 'MY_API_KEY' +model_id = 'MY_MODEL_ID' + +# Init a new client +mindee_client = Mindee::ClientV2.new(api_key: api_key) + +# Set inference parameters +split_params = Params::SplitParameters.new( + # ID of the model, required. + model_id, +) + +# Load a file from disk +input_source = Mindee::Input::Source::PathInputSource.new(input_path) + +# Send for processing +response = mindee_client.enqueue_and_get_result( + SplitResponse, + input_source, + split_params +) + +# Print a brief summary of the parsed data +puts response.inference diff --git a/spec/test_code_samples.sh b/spec/test_code_samples_v1.sh similarity index 79% rename from spec/test_code_samples.sh rename to spec/test_code_samples_v1.sh index 5bce3950..8a703210 100755 --- a/spec/test_code_samples.sh +++ b/spec/test_code_samples_v1.sh @@ -1,17 +1,15 @@ #! /bin/sh set -e -OUTPUT_FILE='./_test.rb' +OUTPUT_FILE='./_test_v1.rb' ACCOUNT=$1 ENDPOINT=$2 API_KEY=$3 -API_KEY_V2=$4 -MODEL_ID=$5 if [ -z "${ACCOUNT}" ]; then echo "ACCOUNT is required"; exit 1; fi if [ -z "${ENDPOINT}" ]; then echo "ENDPOINT is required"; exit 1; fi -for f in $(find ./docs/code_samples -maxdepth 1 -name "*.txt" -not -name "workflow_*.txt" | sort -h) +for f in $(find ./docs/code_samples -maxdepth 1 -name "*.txt" -not -name "workflow_*.txt" -not -name "v2_*" | sort -h) do echo echo "###############################################" @@ -22,14 +20,6 @@ do sed "s/my-api-key/${API_KEY}/" "$f" > $OUTPUT_FILE sed -i 's/\/path\/to\/the\/file.ext/.\/spec\/data\/file_types\/pdf\/blank_1.pdf/' $OUTPUT_FILE - if echo "${f}" | grep -q "default_v2.txt" - then - sed -i "s/MY_API_KEY/$API_KEY_V2/" $OUTPUT_FILE - sed -i "s/MY_MODEL_ID/$MODEL_ID/" $OUTPUT_FILE - else - sed -i "s/my-api-key/$API_KEY/" $OUTPUT_FILE - fi - if echo "$f" | grep -q "custom_v1.txt" then sed -i "s/my-account/$ACCOUNT/g" $OUTPUT_FILE diff --git a/spec/test_code_samples_v2.sh b/spec/test_code_samples_v2.sh new file mode 100755 index 00000000..cd6b95be --- /dev/null +++ b/spec/test_code_samples_v2.sh @@ -0,0 +1,60 @@ +#! /bin/sh +set -e + +OUTPUT_FILE='./_test_v2.rb' +API_KEY_V2=$1 +FINDOC_MODEL_ID=$2 +WEBHOOK_ID=$3 +CLASSIFICATION_MODEL_ID=$4 +CROP_MODEL_ID=$5 +OCR_MODEL_ID=$6 +SPLIT_MODEL_ID=$7 + +if [ -z "${FINDOC_MODEL_ID}" ]; then echo "FINDOC_MODEL_ID is required"; exit 1; fi +if [ -z "${WEBHOOK_ID}" ]; then echo "WEBHOOK_ID is required"; exit 1; fi + +for f in $(find ./docs/code_samples -maxdepth 1 -name "v2_*.txt" | sort -h) +do + echo + echo "###############################################" + echo "${f}" + echo "###############################################" + echo + + cat "${f}" > $OUTPUT_FILE + sed -i "s/MY_API_KEY/$API_KEY_V2/" $OUTPUT_FILE + sed -i "s/MY_WEBHOOK_ID/$WEBHOOK_ID/" $OUTPUT_FILE + sed -i 's/\/path\/to\/the\/file.ext/.\/spec\/data\/file_types\/pdf\/blank_1.pdf/' $OUTPUT_FILE + + if echo "${f}" | grep -q "v2_default.txt" + then + sed -i "s/MY_MODEL_ID/$FINDOC_MODEL_ID/" $OUTPUT_FILE + fi + + if echo "${f}" | grep -q "v2_default_webhook.txt" + then + sed -i "s/MY_MODEL_ID/$FINDOC_MODEL_ID/" $OUTPUT_FILE + fi + + if echo "${f}" | grep -q "v2_classification.txt" + then + sed -i "s/MY_MODEL_ID/$CLASSIFICATION_MODEL_ID/" $OUTPUT_FILE + fi + + if echo "${f}" | grep -q "v2_crop.txt" + then + sed -i "s/MY_MODEL_ID/$CROP_MODEL_ID/" $OUTPUT_FILE + fi + + if echo "${f}" | grep -q "v2_ocr.txt" + then + sed -i "s/MY_MODEL_ID/$OCR_MODEL_ID/" $OUTPUT_FILE + fi + + if echo "${f}" | grep -q "v2_split.txt" + then + sed -i "s/MY_MODEL_ID/$SPLIT_MODEL_ID/" $OUTPUT_FILE + fi + + bundle exec ruby $OUTPUT_FILE +done From afdcace8dc6d40e7fbb9f8141e0332bf2e00a1d8 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 26 Feb 2026 12:19:35 +0100 Subject: [PATCH 16/23] fix code samples + add env vars --- .github/workflows/_test-code-samples.yml | 16 ++++++++++-- docs/code_samples/v2_classification.txt | 4 +-- docs/code_samples/v2_crop.txt | 4 +-- .../{v2_default.txt => v2_extraction.txt} | 0 ..._webhook.txt => v2_extraction_webhook.txt} | 0 docs/code_samples/v2_ocr.txt | 4 +-- docs/code_samples/v2_split.txt | 4 +-- spec/test_code_samples_v1.sh | 17 +++++------- spec/test_code_samples_v2.sh | 26 ++++++------------- 9 files changed, 37 insertions(+), 38 deletions(-) rename docs/code_samples/{v2_default.txt => v2_extraction.txt} (100%) rename docs/code_samples/{v2_default_webhook.txt => v2_extraction_webhook.txt} (100%) diff --git a/.github/workflows/_test-code-samples.yml b/.github/workflows/_test-code-samples.yml index 36913bdf..81e76f0b 100644 --- a/.github/workflows/_test-code-samples.yml +++ b/.github/workflows/_test-code-samples.yml @@ -4,6 +4,18 @@ on: workflow_call: workflow_dispatch: +env: + MINDEE_ENDPOINT_SE_TESTS: ${{ secrets.MINDEE_ENDPOINT_SE_TESTS }} + MINDEE_ACCOUNT_SE_TESTS: ${{ secrets.MINDEE_ACCOUNT_SE_TESTS }} + MINDEE_API_KEY_SE_TESTS: ${{ secrets.MINDEE_API_KEY_SE_TESTS }} + MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} + MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} + MINDEE_V2_SE_TESTS_FAILURE_WEBHOOK_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FAILURE_WEBHOOK_ID }} + MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID }} + MINDEE_V2_SE_TESTS_CROP_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CROP_MODEL_ID }} + MINDEE_V2_SE_TESTS_OCR_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_OCR_MODEL_ID }} + MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }} + jobs: test: name: Run Tests @@ -32,10 +44,10 @@ jobs: env: MINDEE_LOG_LEVEL: DEBUG run: | - ./spec/test_code_samples_v1.sh ${{ secrets.MINDEE_ACCOUNT_SE_TESTS }} ${{ secrets.MINDEE_ENDPOINT_SE_TESTS }} ${{ secrets.MINDEE_API_KEY_SE_TESTS }} ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} + ./spec/test_code_samples_v1.sh - name: Tests V2 code samples env: MINDEE_LOG_LEVEL: DEBUG run: | - ./spec/test_code_samples_v2.sh ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} ${{ secrets.MINDEE_V2_SE_TESTS_FAILURE_WEBHOOK_ID }} ${{ secrets.MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID }} ${{ secrets.MINDEE_V2_SE_TESTS_CROP_MODEL_ID }} ${{ secrets.MINDEE_V2_SE_TESTS_OCR_MODEL_ID }} ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }} + ./spec/test_code_samples_v2.sh diff --git a/docs/code_samples/v2_classification.txt b/docs/code_samples/v2_classification.txt index d25a58d8..518d3877 100644 --- a/docs/code_samples/v2_classification.txt +++ b/docs/code_samples/v2_classification.txt @@ -28,5 +28,5 @@ response = mindee_client.enqueue_and_get_result( classification_params ) -# Print a brief summary of the parsed data -puts response.inference +# Access the classification result +puts response.inference.result.classification diff --git a/docs/code_samples/v2_crop.txt b/docs/code_samples/v2_crop.txt index 8ddb921e..b73c7685 100644 --- a/docs/code_samples/v2_crop.txt +++ b/docs/code_samples/v2_crop.txt @@ -28,5 +28,5 @@ response = mindee_client.enqueue_and_get_result( crop_params ) -# Print a brief summary of the parsed data -puts response.inference +# Access the result crops +puts response.inference.result.crops \ No newline at end of file diff --git a/docs/code_samples/v2_default.txt b/docs/code_samples/v2_extraction.txt similarity index 100% rename from docs/code_samples/v2_default.txt rename to docs/code_samples/v2_extraction.txt diff --git a/docs/code_samples/v2_default_webhook.txt b/docs/code_samples/v2_extraction_webhook.txt similarity index 100% rename from docs/code_samples/v2_default_webhook.txt rename to docs/code_samples/v2_extraction_webhook.txt diff --git a/docs/code_samples/v2_ocr.txt b/docs/code_samples/v2_ocr.txt index 94d4fb85..0bac2f6a 100644 --- a/docs/code_samples/v2_ocr.txt +++ b/docs/code_samples/v2_ocr.txt @@ -28,5 +28,5 @@ response = mindee_client.enqueue_and_get_result( ocr_params ) -# Print a brief summary of the parsed data -puts response.inference +# Access the result OCR pages +puts response.inference.result.pages diff --git a/docs/code_samples/v2_split.txt b/docs/code_samples/v2_split.txt index ff649a93..3da0cc1f 100644 --- a/docs/code_samples/v2_split.txt +++ b/docs/code_samples/v2_split.txt @@ -28,5 +28,5 @@ response = mindee_client.enqueue_and_get_result( split_params ) -# Print a brief summary of the parsed data -puts response.inference +# Access the result splits +puts response.inference.result.splits diff --git a/spec/test_code_samples_v1.sh b/spec/test_code_samples_v1.sh index 8a703210..ecf2899e 100755 --- a/spec/test_code_samples_v1.sh +++ b/spec/test_code_samples_v1.sh @@ -2,12 +2,9 @@ set -e OUTPUT_FILE='./_test_v1.rb' -ACCOUNT=$1 -ENDPOINT=$2 -API_KEY=$3 -if [ -z "${ACCOUNT}" ]; then echo "ACCOUNT is required"; exit 1; fi -if [ -z "${ENDPOINT}" ]; then echo "ENDPOINT is required"; exit 1; fi +if [ -z "${MINDEE_ACCOUNT_SE_TESTS}" ]; then echo "MINDEE_ACCOUNT_SE_TESTS is required"; exit 1; fi +if [ -z "${MINDEE_ENDPOINT_SE_TESTS}" ]; then echo "MINDEE_ENDPOINT_SE_TESTS is required"; exit 1; fi for f in $(find ./docs/code_samples -maxdepth 1 -name "*.txt" -not -name "workflow_*.txt" -not -name "v2_*" | sort -h) do @@ -17,18 +14,18 @@ do echo "###############################################" echo - sed "s/my-api-key/${API_KEY}/" "$f" > $OUTPUT_FILE + sed "s/my-api-key/${MINDEE_API_KEY_SE_TESTS}/" "$f" > $OUTPUT_FILE sed -i 's/\/path\/to\/the\/file.ext/.\/spec\/data\/file_types\/pdf\/blank_1.pdf/' $OUTPUT_FILE if echo "$f" | grep -q "custom_v1.txt" then - sed -i "s/my-account/$ACCOUNT/g" $OUTPUT_FILE - sed -i "s/my-endpoint/$ENDPOINT/g" $OUTPUT_FILE + sed -i "s/my-account/$MINDEE_ACCOUNT_SE_TESTS/g" $OUTPUT_FILE + sed -i "s/my-endpoint/$MINDEE_ENDPOINT_SE_TESTS/g" $OUTPUT_FILE fi if echo "${f}" | grep -q "default.txt" then - sed -i "s/my-account/$ACCOUNT/g" $OUTPUT_FILE - sed -i "s/my-endpoint/$ENDPOINT/g" $OUTPUT_FILE + sed -i "s/my-account/$MINDEE_ACCOUNT_SE_TESTS/g" $OUTPUT_FILE + sed -i "s/my-endpoint/$MINDEE_ENDPOINT_SE_TESTS/g" $OUTPUT_FILE sed -i "s/my-version/1/g" $OUTPUT_FILE fi diff --git a/spec/test_code_samples_v2.sh b/spec/test_code_samples_v2.sh index cd6b95be..ae407f49 100755 --- a/spec/test_code_samples_v2.sh +++ b/spec/test_code_samples_v2.sh @@ -2,16 +2,6 @@ set -e OUTPUT_FILE='./_test_v2.rb' -API_KEY_V2=$1 -FINDOC_MODEL_ID=$2 -WEBHOOK_ID=$3 -CLASSIFICATION_MODEL_ID=$4 -CROP_MODEL_ID=$5 -OCR_MODEL_ID=$6 -SPLIT_MODEL_ID=$7 - -if [ -z "${FINDOC_MODEL_ID}" ]; then echo "FINDOC_MODEL_ID is required"; exit 1; fi -if [ -z "${WEBHOOK_ID}" ]; then echo "WEBHOOK_ID is required"; exit 1; fi for f in $(find ./docs/code_samples -maxdepth 1 -name "v2_*.txt" | sort -h) do @@ -22,38 +12,38 @@ do echo cat "${f}" > $OUTPUT_FILE - sed -i "s/MY_API_KEY/$API_KEY_V2/" $OUTPUT_FILE - sed -i "s/MY_WEBHOOK_ID/$WEBHOOK_ID/" $OUTPUT_FILE + sed -i "s/MY_API_KEY/${MINDEE_V2_API_KEY}/" $OUTPUT_FILE + sed -i "s/MY_WEBHOOK_ID/${MINDEE_V2_SE_TESTS_FAILURE_WEBHOOK_ID}/" $OUTPUT_FILE sed -i 's/\/path\/to\/the\/file.ext/.\/spec\/data\/file_types\/pdf\/blank_1.pdf/' $OUTPUT_FILE if echo "${f}" | grep -q "v2_default.txt" then - sed -i "s/MY_MODEL_ID/$FINDOC_MODEL_ID/" $OUTPUT_FILE + sed -i "s/MY_MODEL_ID/${MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID}/" $OUTPUT_FILE fi if echo "${f}" | grep -q "v2_default_webhook.txt" then - sed -i "s/MY_MODEL_ID/$FINDOC_MODEL_ID/" $OUTPUT_FILE + sed -i "s/MY_MODEL_ID/${MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID}/" $OUTPUT_FILE fi if echo "${f}" | grep -q "v2_classification.txt" then - sed -i "s/MY_MODEL_ID/$CLASSIFICATION_MODEL_ID/" $OUTPUT_FILE + sed -i "s/MY_MODEL_ID/${MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID}/" $OUTPUT_FILE fi if echo "${f}" | grep -q "v2_crop.txt" then - sed -i "s/MY_MODEL_ID/$CROP_MODEL_ID/" $OUTPUT_FILE + sed -i "s/MY_MODEL_ID/${MINDEE_V2_SE_TESTS_CROP_MODEL_ID}/" $OUTPUT_FILE fi if echo "${f}" | grep -q "v2_ocr.txt" then - sed -i "s/MY_MODEL_ID/$OCR_MODEL_ID/" $OUTPUT_FILE + sed -i "s/MY_MODEL_ID/${MINDEE_V2_SE_TESTS_OCR_MODEL_ID}/" $OUTPUT_FILE fi if echo "${f}" | grep -q "v2_split.txt" then - sed -i "s/MY_MODEL_ID/$SPLIT_MODEL_ID/" $OUTPUT_FILE + sed -i "s/MY_MODEL_ID/${MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID}/" $OUTPUT_FILE fi bundle exec ruby $OUTPUT_FILE From 85f97a3af4016ad4e2fa68712e702b73cea8bacd Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 26 Feb 2026 12:46:17 +0100 Subject: [PATCH 17/23] fix code samples --- lib/mindee/http/mindee_api_v2.rb | 10 ---------- sig/mindee/http/mindee_api_v2.rbs | 1 - spec/test_code_samples_v2.sh | 7 +------ 3 files changed, 1 insertion(+), 17 deletions(-) diff --git a/lib/mindee/http/mindee_api_v2.rb b/lib/mindee/http/mindee_api_v2.rb index 0393b14c..de83d118 100644 --- a/lib/mindee/http/mindee_api_v2.rb +++ b/lib/mindee/http/mindee_api_v2.rb @@ -71,16 +71,6 @@ def req_get_job(resource) private - # Retrieves a queued job. - # - # @param url [String] - # @return [Mindee::Parsing::V2::JobResponse] - def req_get_job_url(url) - @settings.check_api_key - response = poll(url) - Mindee::Parsing::V2::JobResponse.new(process_response(response)) - end - # Retrieves a queued job. # # @param result_class [Mindee::V2::Parsing::BaseResponse] diff --git a/sig/mindee/http/mindee_api_v2.rbs b/sig/mindee/http/mindee_api_v2.rbs index 60bb1269..f6a3b572 100644 --- a/sig/mindee/http/mindee_api_v2.rbs +++ b/sig/mindee/http/mindee_api_v2.rbs @@ -25,7 +25,6 @@ module Mindee private - def req_get_job_url: (String) -> Parsing::V2::JobResponse def req_get_result_url: [T] (_ResponseFactory[T] result_class, String url) -> T def uri?: (String) -> bool def enqueue_form_options: (Array[untyped], Input::InferenceParameters) -> Array[untyped] diff --git a/spec/test_code_samples_v2.sh b/spec/test_code_samples_v2.sh index ae407f49..16a6e30c 100755 --- a/spec/test_code_samples_v2.sh +++ b/spec/test_code_samples_v2.sh @@ -16,12 +16,7 @@ do sed -i "s/MY_WEBHOOK_ID/${MINDEE_V2_SE_TESTS_FAILURE_WEBHOOK_ID}/" $OUTPUT_FILE sed -i 's/\/path\/to\/the\/file.ext/.\/spec\/data\/file_types\/pdf\/blank_1.pdf/' $OUTPUT_FILE - if echo "${f}" | grep -q "v2_default.txt" - then - sed -i "s/MY_MODEL_ID/${MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID}/" $OUTPUT_FILE - fi - - if echo "${f}" | grep -q "v2_default_webhook.txt" + if echo "${f}" | grep -q "v2_extraction.*.txt" then sed -i "s/MY_MODEL_ID/${MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID}/" $OUTPUT_FILE fi From 12cb61a00085d1a5d846e86a75ba956c12f917d4 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 26 Feb 2026 12:53:08 +0100 Subject: [PATCH 18/23] restore whoopsie --- lib/mindee/http/mindee_api_v2.rb | 10 ++++++++++ sig/mindee/http/mindee_api_v2.rbs | 1 + 2 files changed, 11 insertions(+) diff --git a/lib/mindee/http/mindee_api_v2.rb b/lib/mindee/http/mindee_api_v2.rb index de83d118..0393b14c 100644 --- a/lib/mindee/http/mindee_api_v2.rb +++ b/lib/mindee/http/mindee_api_v2.rb @@ -71,6 +71,16 @@ def req_get_job(resource) private + # Retrieves a queued job. + # + # @param url [String] + # @return [Mindee::Parsing::V2::JobResponse] + def req_get_job_url(url) + @settings.check_api_key + response = poll(url) + Mindee::Parsing::V2::JobResponse.new(process_response(response)) + end + # Retrieves a queued job. # # @param result_class [Mindee::V2::Parsing::BaseResponse] diff --git a/sig/mindee/http/mindee_api_v2.rbs b/sig/mindee/http/mindee_api_v2.rbs index f6a3b572..60bb1269 100644 --- a/sig/mindee/http/mindee_api_v2.rbs +++ b/sig/mindee/http/mindee_api_v2.rbs @@ -25,6 +25,7 @@ module Mindee private + def req_get_job_url: (String) -> Parsing::V2::JobResponse def req_get_result_url: [T] (_ResponseFactory[T] result_class, String url) -> T def uri?: (String) -> bool def enqueue_form_options: (Array[untyped], Input::InferenceParameters) -> Array[untyped] From 6ef6adc604550cb1da1e750f6aea87b6bdab10c6 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 26 Feb 2026 16:16:53 +0100 Subject: [PATCH 19/23] remove polling through URL --- lib/mindee/client_v2.rb | 6 +++--- lib/mindee/http/mindee_api_v2.rb | 8 +++----- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/lib/mindee/client_v2.rb b/lib/mindee/client_v2.rb index 1c34ccd0..a006dbc5 100644 --- a/lib/mindee/client_v2.rb +++ b/lib/mindee/client_v2.rb @@ -35,10 +35,10 @@ def get_result(response_class, resource) end # Retrieves an inference from a given queue or URL to the job. - # @param resource [String] ID of the job or URL to the job. + # @param job_id [String] ID of the job. # @return [Mindee::Parsing::V2::JobResponse] - def get_job(resource) - @mindee_api.req_get_job(resource) + def get_job(job_id) + @mindee_api.req_get_job(job_id) end # Enqueue a document for async parsing. diff --git a/lib/mindee/http/mindee_api_v2.rb b/lib/mindee/http/mindee_api_v2.rb index 0393b14c..81f9fef2 100644 --- a/lib/mindee/http/mindee_api_v2.rb +++ b/lib/mindee/http/mindee_api_v2.rb @@ -57,14 +57,12 @@ def req_get_result(response_class, resource) # Retrieves a queued job. # - # @param resource [String] ID of the job or URL to the job. + # @param job_id [String] ID of the job or URL to the job. # @return [Mindee::Parsing::V2::JobResponse] - def req_get_job(resource) - return req_get_job_url(resource) if uri?(resource) - + def req_get_job(job_id) @settings.check_api_key response = inference_job_req_get( - resource + job_id ) Mindee::Parsing::V2::JobResponse.new(process_response(response)) end From fd950f098e42225e2175f3e0ef91cd3331c7825a Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Fri, 27 Feb 2026 12:22:53 +0100 Subject: [PATCH 20/23] update system to only include one class --- .github/workflows/_test-integrations.yml | 2 +- .github/workflows/_test-units.yml | 1 + docs/code_samples/v2_classification.txt | 10 +++---- docs/code_samples/v2_crop.txt | 10 +++---- docs/code_samples/v2_extraction_webhook.txt | 2 +- docs/code_samples/v2_ocr.txt | 10 +++---- docs/code_samples/v2_split.txt | 10 +++---- lib/mindee/client_v2.rb | 16 +++++------ lib/mindee/http/mindee_api_v2.rb | 20 +++++++------- lib/mindee/input/base_parameters.rb | 8 +++--- lib/mindee/input/inference_parameters.rb | 2 +- lib/mindee/parsing/v2/inference.rb | 4 +++ lib/mindee/parsing/v2/inference_response.rb | 2 +- lib/mindee/v2/parsing/base_inference.rb | 6 ++++- lib/mindee/v2/parsing/base_response.rb | 9 ------- lib/mindee/v2/product.rb | 12 +++------ lib/mindee/v2/product/base_product.rb | 27 +++++++++++++++++++ .../product/classification/classification.rb | 19 +++++++++++++ .../classification/classification_response.rb | 3 --- .../params/classification_parameters.rb | 2 +- lib/mindee/v2/product/crop/crop.rb | 19 +++++++++++++ lib/mindee/v2/product/crop/crop_response.rb | 3 --- .../v2/product/crop/params/crop_parameters.rb | 2 +- lib/mindee/v2/product/ocr/ocr.rb | 19 +++++++++++++ lib/mindee/v2/product/ocr/ocr_response.rb | 3 --- .../v2/product/ocr/params/ocr_parameters.rb | 2 +- .../product/split/params/split_parameters.rb | 2 +- lib/mindee/v2/product/split/split.rb | 19 +++++++++++++ lib/mindee/v2/product/split/split_response.rb | 3 --- sig/mindee/client_v2.rbs | 25 +++++++---------- sig/mindee/http/mindee_api_v2.rbs | 15 ++++++----- sig/mindee/input/base_parameters.rbs | 4 +-- sig/mindee/input/inference_parameters.rbs | 2 +- sig/mindee/parsing/v2/inference.rbs | 4 +++ sig/mindee/parsing/v2/inference_response.rbs | 4 +-- sig/mindee/v2/parsing/base_response.rbs | 9 ------- sig/mindee/v2/product/base_product.rbs | 19 +++++++++++++ .../product/classification/classification.rbs | 10 +++++++ .../classification_classifier.rbs | 0 .../classification_inference.rbs | 0 .../classification_response.rbs | 4 +-- .../classification/classification_result.rbs | 0 .../classification_parameters.rbs | 2 +- sig/mindee/v2/product/crop/crop.rbs | 10 +++++++ sig/mindee/v2/product/crop/crop_response.rbs | 4 +-- .../crop_parameters/crop_parameters.rbs | 2 +- sig/mindee/v2/product/ocr/ocr.rbs | 10 +++++++ sig/mindee/v2/product/ocr/ocr_response.rbs | 4 +-- .../params/ocr_parameters/ocr_parameters.rbs | 2 +- .../split_parameters/split_parameters.rbs | 2 +- sig/mindee/v2/product/split/split.rbs | 10 +++++++ .../v2/product/split/split_response.rbs | 4 +-- .../classification_integration.rb | 4 +-- spec/v2/product/crop/crop_integration.rb | 4 +-- spec/v2/product/ocr/ocr_integration.rb | 4 +-- spec/v2/product/split/split_integration.rb | 4 +-- spec/v2/product/split/split_spec.rb | 3 ++- 57 files changed, 271 insertions(+), 141 deletions(-) create mode 100644 lib/mindee/v2/product/base_product.rb create mode 100644 lib/mindee/v2/product/classification/classification.rb create mode 100644 lib/mindee/v2/product/crop/crop.rb create mode 100644 lib/mindee/v2/product/ocr/ocr.rb create mode 100644 lib/mindee/v2/product/split/split.rb create mode 100644 sig/mindee/v2/product/base_product.rbs create mode 100644 sig/mindee/v2/product/classification/classification.rbs rename sig/mindee/{ => v2}/product/classification/classification_classifier.rbs (100%) rename sig/mindee/{ => v2}/product/classification/classification_inference.rbs (100%) rename sig/mindee/{ => v2}/product/classification/classification_result.rbs (100%) create mode 100644 sig/mindee/v2/product/crop/crop.rbs create mode 100644 sig/mindee/v2/product/ocr/ocr.rbs create mode 100644 sig/mindee/v2/product/split/split.rbs diff --git a/.github/workflows/_test-integrations.yml b/.github/workflows/_test-integrations.yml index c62ac12d..75a72a71 100644 --- a/.github/workflows/_test-integrations.yml +++ b/.github/workflows/_test-integrations.yml @@ -20,7 +20,7 @@ jobs: - "macos-latest" ruby: - "3.0" - - "3.4" + - "4.0" steps: - uses: actions/checkout@v4 with: diff --git a/.github/workflows/_test-units.yml b/.github/workflows/_test-units.yml index a78ce8de..1b3859e7 100644 --- a/.github/workflows/_test-units.yml +++ b/.github/workflows/_test-units.yml @@ -23,6 +23,7 @@ jobs: - "3.2" - "3.3" - "3.4" + - "4.0" steps: - uses: actions/checkout@v4 with: diff --git a/docs/code_samples/v2_classification.txt b/docs/code_samples/v2_classification.txt index 518d3877..12bcd4dc 100644 --- a/docs/code_samples/v2_classification.txt +++ b/docs/code_samples/v2_classification.txt @@ -3,8 +3,6 @@ require 'mindee' require 'mindee/v2/product' -include Mindee::V2::Product::Classification - input_path = '/path/to/the/file.ext' api_key = 'MY_API_KEY' model_id = 'MY_MODEL_ID' @@ -13,17 +11,17 @@ model_id = 'MY_MODEL_ID' mindee_client = Mindee::ClientV2.new(api_key: api_key) # Set inference parameters -classification_params = Params::ClassificationParameters.new( +classification_params = { # ID of the model, required. - model_id, -) + model_id: model_id, +} # Load a file from disk input_source = Mindee::Input::Source::PathInputSource.new(input_path) # Send for processing response = mindee_client.enqueue_and_get_result( - ClassificationResponse, + Mindee::V2::Product::Classification::Classification, input_source, classification_params ) diff --git a/docs/code_samples/v2_crop.txt b/docs/code_samples/v2_crop.txt index b73c7685..bd4e6e57 100644 --- a/docs/code_samples/v2_crop.txt +++ b/docs/code_samples/v2_crop.txt @@ -3,8 +3,6 @@ require 'mindee' require 'mindee/v2/product' -include Mindee::V2::Product::Crop - input_path = '/path/to/the/file.ext' api_key = 'MY_API_KEY' model_id = 'MY_MODEL_ID' @@ -13,17 +11,17 @@ model_id = 'MY_MODEL_ID' mindee_client = Mindee::ClientV2.new(api_key: api_key) # Set inference parameters -crop_params = Params::CropParameters.new( +crop_params = { # ID of the model, required. - model_id, -) + model_id: model_id, +} # Load a file from disk input_source = Mindee::Input::Source::PathInputSource.new(input_path) # Send for processing response = mindee_client.enqueue_and_get_result( - CropResponse, + Mindee::V2::Product::Crop::Crop, input_source, crop_params ) diff --git a/docs/code_samples/v2_extraction_webhook.txt b/docs/code_samples/v2_extraction_webhook.txt index c09ad207..ae641d23 100644 --- a/docs/code_samples/v2_extraction_webhook.txt +++ b/docs/code_samples/v2_extraction_webhook.txt @@ -14,7 +14,7 @@ inference_params = Mindee::Input::InferenceParameters.new( # ID of the model, required. model_id, # Add any number of webhook IDs here. - webhook_ids: ["MY_WEBHOOK_ID"], + webhook_ids: ['MY_WEBHOOK_ID'], # Options: set to `true` or `false` to override defaults # Enhance extraction accuracy with Retrieval-Augmented Generation. diff --git a/docs/code_samples/v2_ocr.txt b/docs/code_samples/v2_ocr.txt index 0bac2f6a..3a7d9250 100644 --- a/docs/code_samples/v2_ocr.txt +++ b/docs/code_samples/v2_ocr.txt @@ -3,8 +3,6 @@ require 'mindee' require 'mindee/v2/product' -include Mindee::V2::Product::Ocr - input_path = '/path/to/the/file.ext' api_key = 'MY_API_KEY' model_id = 'MY_MODEL_ID' @@ -13,17 +11,17 @@ model_id = 'MY_MODEL_ID' mindee_client = Mindee::ClientV2.new(api_key: api_key) # Set inference parameters -ocr_params = Params::OcrParameters.new( +ocr_params = { # ID of the model, required. - model_id, -) + model_id: model_id, +} # Load a file from disk input_source = Mindee::Input::Source::PathInputSource.new(input_path) # Send for processing response = mindee_client.enqueue_and_get_result( - OcrResponse, + Mindee::V2::Product::Ocr::Ocr, input_source, ocr_params ) diff --git a/docs/code_samples/v2_split.txt b/docs/code_samples/v2_split.txt index 3da0cc1f..e599d927 100644 --- a/docs/code_samples/v2_split.txt +++ b/docs/code_samples/v2_split.txt @@ -3,8 +3,6 @@ require 'mindee' require 'mindee/v2/product' -include Mindee::V2::Product::Split - input_path = '/path/to/the/file.ext' api_key = 'MY_API_KEY' model_id = 'MY_MODEL_ID' @@ -13,17 +11,17 @@ model_id = 'MY_MODEL_ID' mindee_client = Mindee::ClientV2.new(api_key: api_key) # Set inference parameters -split_params = Params::SplitParameters.new( +split_params = { # ID of the model, required. - model_id, -) + model_id: model_id, +} # Load a file from disk input_source = Mindee::Input::Source::PathInputSource.new(input_path) # Send for processing response = mindee_client.enqueue_and_get_result( - SplitResponse, + Mindee::V2::Product::Split::Split, input_source, split_params ) diff --git a/lib/mindee/client_v2.rb b/lib/mindee/client_v2.rb index a006dbc5..7cd7090c 100644 --- a/lib/mindee/client_v2.rb +++ b/lib/mindee/client_v2.rb @@ -27,11 +27,11 @@ def get_inference(inference_id) end # Retrieves a result from a given queue or URL to the result. + # @param product_type [Class] The return class. # @param resource [String] ID of the inference or URL to the result. - # @param response_class [Class] # @return [Mindee::Parsing::V2::BaseResponse] - def get_result(response_class, resource) - @mindee_api.req_get_result(response_class, resource) + def get_result(product_type, resource) + @mindee_api.req_get_result(product_type, resource) end # Retrieves an inference from a given queue or URL to the job. @@ -67,17 +67,17 @@ def enqueue(input_source, params) # Enqueues to an asynchronous endpoint and automatically polls for a response. # - # @param response_type [Mindee::V2::BaseResponse] The return class. + # @param product_type [Class] The return class. # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] # The source of the input document (local file or URL). # @param params [Hash, InferenceParameters] Parameters for the inference. # @return [Mindee::Parsing::Common::ApiResponse] def enqueue_and_get_result( - response_type, + product_type, input_source, params ) - normalized_params = normalize_parameters(response_type._params_type, params) + normalized_params = normalize_parameters(product_type.params_type, params) normalized_params.validate_async_params enqueue_response = enqueue(input_source, normalized_params) @@ -97,7 +97,7 @@ def enqueue_and_get_result( if poll_results.job.status == 'Failed' break elsif !poll_results.job.result_url.nil? - return get_result(response_type, poll_results.job.result_url) + return get_result(product_type, poll_results.job.result_url) end logger.debug( @@ -131,7 +131,7 @@ def enqueue_and_get_result( def enqueue_and_get_inference(input_source, params) warn '[DEPRECATION] `enqueue_and_get_inference` is deprecated; use `enqueue_and_get_result` instead.' - response = enqueue_and_get_result(Mindee::Parsing::V2::InferenceResponse, input_source, params) + response = enqueue_and_get_result(Mindee::Parsing::V2::Inference, input_source, params) unless response.is_a?(Mindee::Parsing::V2::InferenceResponse) raise TypeError, "Invalid response type \"#{response.class}\"" end diff --git a/lib/mindee/http/mindee_api_v2.rb b/lib/mindee/http/mindee_api_v2.rb index 81f9fef2..89488bdf 100644 --- a/lib/mindee/http/mindee_api_v2.rb +++ b/lib/mindee/http/mindee_api_v2.rb @@ -37,22 +37,22 @@ def req_post_enqueue(input_source, params) # @param inference_id [String] # @return [Mindee::Parsing::V2::InferenceResponse] def req_get_inference(inference_id) - req_get_result(Mindee::Parsing::V2::InferenceResponse, inference_id) + req_get_result(Parsing::V2::Inference, inference_id) end # Retrieves a result from a given queue. - # @param response_class [Class] + # @param product_type [Class] The return class. # @param resource [String] ID of the inference or URL to the result. # @return [Mindee::Parsing::V2::BaseResponse] - def req_get_result(response_class, resource) - return req_get_result_url(response_class, resource) if uri?(resource) + def req_get_result(product_type, resource) + return req_get_result_url(product_type.response_type, resource) if uri?(resource) @settings.check_api_key response = result_req_get( resource, - response_class + product_type ) - response_class.new(process_response(response)) + product_type.response_type.new(process_response(response)) end # Retrieves a queued job. @@ -156,10 +156,10 @@ def inference_result_req_get(queue_id) # Polls the API for the result of an inference. # # @param queue_id [String] ID of the queue. - # @param response_class [Class] + # @param product_type [Class] The return class. # @return [Net::HTTPResponse] - def result_req_get(queue_id, response_class) - poll("#{@settings.base_url}/products/#{response_class._slug}/results/#{queue_id}") + def result_req_get(queue_id, product_type) + poll("#{@settings.base_url}/products/#{product_type.slug}/results/#{queue_id}") end # Handle parameters for the enqueue form @@ -184,7 +184,7 @@ def enqueue_form_options(form_data, params) # @param params [Input::BaseParameters] Inference options. # @return [Net::HTTPResponse, nil] def enqueue(input_source, params) - uri = URI("#{@settings.base_url}/products/#{params._slug}/enqueue") + uri = URI("#{@settings.base_url}/products/#{params.slug}/enqueue") form_data = if input_source.is_a?(Mindee::Input::Source::URLInputSource) [['url', input_source.url]] # : Array[untyped] diff --git a/lib/mindee/input/base_parameters.rb b/lib/mindee/input/base_parameters.rb index d93c0723..680353e5 100644 --- a/lib/mindee/input/base_parameters.rb +++ b/lib/mindee/input/base_parameters.rb @@ -41,17 +41,17 @@ def initialize( end # @return [String] Slug for the endpoint. - def self._slug + def self.slug if self == BaseParameters - raise NotImplementedError, 'Cannot access `_slug` directly on the BaseParameters class.' + raise NotImplementedError, 'Cannot access `slug` directly on the BaseParameters class.' end '' end # @return [String] Slug for the endpoint. - def _slug - self.class._slug + def slug + self.class.slug end def self.from_hash(params: {}) diff --git a/lib/mindee/input/inference_parameters.rb b/lib/mindee/input/inference_parameters.rb index f2390831..a377d052 100644 --- a/lib/mindee/input/inference_parameters.rb +++ b/lib/mindee/input/inference_parameters.rb @@ -30,7 +30,7 @@ class InferenceParameters < Mindee::Input::BaseParameters attr_reader :data_schema # @return [String] Slug for the endpoint. - def self._slug + def self.slug 'extraction' end diff --git a/lib/mindee/parsing/v2/inference.rb b/lib/mindee/parsing/v2/inference.rb index 2c00e590..32801564 100644 --- a/lib/mindee/parsing/v2/inference.rb +++ b/lib/mindee/parsing/v2/inference.rb @@ -17,6 +17,10 @@ class Inference < Mindee::V2::Parsing::BaseInference # @return [InferenceResult] Result contents. attr_reader :result + @params_type = Input::InferenceParameters + @slug = 'extraction' + @response_type = InferenceResponse + # @param server_response [Hash] Hash representation of the JSON returned by the service. def initialize(server_response) super diff --git a/lib/mindee/parsing/v2/inference_response.rb b/lib/mindee/parsing/v2/inference_response.rb index fd1d2750..7911c33b 100644 --- a/lib/mindee/parsing/v2/inference_response.rb +++ b/lib/mindee/parsing/v2/inference_response.rb @@ -12,7 +12,7 @@ class InferenceResponse < Mindee::V2::Parsing::BaseResponse # @return [Inference] Parsed inference payload. attr_reader :inference - @_slug = 'extraction' + @slug = 'extraction' @_params_type = Input::InferenceParameters # @param server_response [Hash] Hash parsed from the API JSON response. diff --git a/lib/mindee/v2/parsing/base_inference.rb b/lib/mindee/v2/parsing/base_inference.rb index da2a867e..31cbe87c 100644 --- a/lib/mindee/v2/parsing/base_inference.rb +++ b/lib/mindee/v2/parsing/base_inference.rb @@ -1,10 +1,13 @@ # frozen_string_literal: true +require_relative '../product/base_product' +require_relative '../../parsing/v2/inference_response' + module Mindee module V2 module Parsing # Base class for V2 inference responses. - class BaseInference + class BaseInference < Mindee::V2::Product::BaseProduct # @return [InferenceJob] Metadata about the job. attr_reader :job # @return [Parsing::V2::InferenceModel] Model info for the inference. @@ -17,6 +20,7 @@ class BaseInference def initialize(http_response) raise ArgumentError, 'Server response must be a Hash' unless http_response.is_a?(Hash) + super() @model = Mindee::Parsing::V2::InferenceModel.new(http_response['model']) @file = Mindee::Parsing::V2::InferenceFile.new(http_response['file']) @id = http_response['id'] diff --git a/lib/mindee/v2/parsing/base_response.rb b/lib/mindee/v2/parsing/base_response.rb index 68e0a68c..a604e156 100644 --- a/lib/mindee/v2/parsing/base_response.rb +++ b/lib/mindee/v2/parsing/base_response.rb @@ -7,15 +7,6 @@ module Parsing class BaseResponse < Mindee::Parsing::V2::CommonResponse # @return [BaseInference] The inference result for a split utility request attr_reader :inference - - # @return [String] The slug of the endpoint used for this response - @_slug = '' - # @return [Class] The class of the parameters used for this response - @_params_type = Mindee::Input::BaseParameters - - class << self - attr_reader :_params_type, :_slug - end end end end diff --git a/lib/mindee/v2/product.rb b/lib/mindee/v2/product.rb index c34a2224..561b8af5 100644 --- a/lib/mindee/v2/product.rb +++ b/lib/mindee/v2/product.rb @@ -1,10 +1,6 @@ # frozen_string_literal: true -require_relative 'product/classification/classification_response' -require_relative 'product/classification/params/classification_parameters' -require_relative 'product/crop/crop_response' -require_relative 'product/crop/params/crop_parameters' -require_relative 'product/ocr/ocr_response' -require_relative 'product/ocr/params/ocr_parameters' -require_relative 'product/split/split_response' -require_relative 'product/split/params/split_parameters' +require_relative 'product/classification/classification' +require_relative 'product/crop/crop' +require_relative 'product/ocr/ocr' +require_relative 'product/split/split' diff --git a/lib/mindee/v2/product/base_product.rb b/lib/mindee/v2/product/base_product.rb new file mode 100644 index 00000000..a11e0eb1 --- /dev/null +++ b/lib/mindee/v2/product/base_product.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require_relative '../parsing/base_response' + +module Mindee + module V2 + module Product + # Base class for all V2 products. + class BaseProduct + # @return [String] The slug of the endpoint used for this response + @slug = '' + # @return [Class] The class of the parameters used for this response + @params_type = Mindee::Input::BaseParameters + # @return [Class] The class of the response used for this product + @response_type = Mindee::V2::Parsing::BaseResponse + + def initialize + raise StandardError, 'Cannot instantiate abstract class.' if instance_of?(BaseProduct) + end + + class << self + attr_reader :params_type, :slug, :response_type + end + end + end + end +end diff --git a/lib/mindee/v2/product/classification/classification.rb b/lib/mindee/v2/product/classification/classification.rb new file mode 100644 index 00000000..0eff57a8 --- /dev/null +++ b/lib/mindee/v2/product/classification/classification.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require_relative 'classification_response' +require_relative 'params/classification_parameters' + +module Mindee + module V2 + module Product + module Classification + # Classification product. + class Classification < BaseProduct + @slug = 'classification' + @params_type = Mindee::V2::Product::Classification::Params::ClassificationParameters + @response_type = Mindee::V2::Product::Classification::ClassificationResponse + end + end + end + end +end diff --git a/lib/mindee/v2/product/classification/classification_response.rb b/lib/mindee/v2/product/classification/classification_response.rb index 24b8b4a1..0b2f7d3e 100644 --- a/lib/mindee/v2/product/classification/classification_response.rb +++ b/lib/mindee/v2/product/classification/classification_response.rb @@ -13,9 +13,6 @@ class ClassificationResponse < Mindee::V2::Parsing::BaseResponse # @return [ClassificationInference] Parsed inference payload. attr_reader :inference - @_slug = 'classification' - @_params_type = Classification::Params::ClassificationParameters - # @param server_response [Hash] Hash parsed from the API JSON response. def initialize(server_response) super diff --git a/lib/mindee/v2/product/classification/params/classification_parameters.rb b/lib/mindee/v2/product/classification/params/classification_parameters.rb index d4ff6ae6..398b218e 100644 --- a/lib/mindee/v2/product/classification/params/classification_parameters.rb +++ b/lib/mindee/v2/product/classification/params/classification_parameters.rb @@ -8,7 +8,7 @@ module Params # Parameters accepted by the classification utility v2 endpoint. class ClassificationParameters < Mindee::Input::BaseParameters # @return [String] Slug for the endpoint. - def self._slug + def self.slug 'classification' end diff --git a/lib/mindee/v2/product/crop/crop.rb b/lib/mindee/v2/product/crop/crop.rb new file mode 100644 index 00000000..24211cfb --- /dev/null +++ b/lib/mindee/v2/product/crop/crop.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require_relative 'crop_response' +require_relative 'params/crop_parameters' + +module Mindee + module V2 + module Product + module Crop + # Crop product. + class Crop < BaseProduct + @slug = 'crop' + @params_type = Mindee::V2::Product::Crop::Params::CropParameters + @response_type = Mindee::V2::Product::Crop::CropResponse + end + end + end + end +end diff --git a/lib/mindee/v2/product/crop/crop_response.rb b/lib/mindee/v2/product/crop/crop_response.rb index 9a6c03e6..90ada724 100644 --- a/lib/mindee/v2/product/crop/crop_response.rb +++ b/lib/mindee/v2/product/crop/crop_response.rb @@ -13,9 +13,6 @@ class CropResponse < Mindee::V2::Parsing::BaseResponse # @return [CropInference] Parsed inference payload. attr_reader :inference - @_slug = 'crop' - @_params_type = Crop::Params::CropParameters - # @param server_response [Hash] Hash parsed from the API JSON response. def initialize(server_response) super diff --git a/lib/mindee/v2/product/crop/params/crop_parameters.rb b/lib/mindee/v2/product/crop/params/crop_parameters.rb index 49fafc3a..712fe7ce 100644 --- a/lib/mindee/v2/product/crop/params/crop_parameters.rb +++ b/lib/mindee/v2/product/crop/params/crop_parameters.rb @@ -8,7 +8,7 @@ module Params # Parameters accepted by the crop utility v2 endpoint. class CropParameters < Mindee::Input::BaseParameters # @return [String] Slug for the endpoint. - def self._slug + def self.slug 'crop' end diff --git a/lib/mindee/v2/product/ocr/ocr.rb b/lib/mindee/v2/product/ocr/ocr.rb new file mode 100644 index 00000000..50ff9e80 --- /dev/null +++ b/lib/mindee/v2/product/ocr/ocr.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require_relative 'ocr_response' +require_relative 'params/ocr_parameters' + +module Mindee + module V2 + module Product + module Ocr + # Ocr product. + class Ocr < BaseProduct + @slug = 'ocr' + @params_type = Mindee::V2::Product::Ocr::Params::OcrParameters + @response_type = Mindee::V2::Product::Ocr::OcrResponse + end + end + end + end +end diff --git a/lib/mindee/v2/product/ocr/ocr_response.rb b/lib/mindee/v2/product/ocr/ocr_response.rb index 331d3b74..3bac4d1e 100644 --- a/lib/mindee/v2/product/ocr/ocr_response.rb +++ b/lib/mindee/v2/product/ocr/ocr_response.rb @@ -13,9 +13,6 @@ class OcrResponse < Mindee::V2::Parsing::BaseResponse # @return [OcrInference] Parsed inference payload. attr_reader :inference - @_slug = 'ocr' - @_params_type = Ocr::Params::OcrParameters - # @param server_response [Hash] Hash parsed from the API JSON response. def initialize(server_response) super diff --git a/lib/mindee/v2/product/ocr/params/ocr_parameters.rb b/lib/mindee/v2/product/ocr/params/ocr_parameters.rb index e2d56eb6..ca068c83 100644 --- a/lib/mindee/v2/product/ocr/params/ocr_parameters.rb +++ b/lib/mindee/v2/product/ocr/params/ocr_parameters.rb @@ -8,7 +8,7 @@ module Params # Parameters accepted by the ocr utility v2 endpoint. class OcrParameters < Mindee::Input::BaseParameters # @return [String] Slug for the endpoint. - def self._slug + def self.slug 'ocr' end diff --git a/lib/mindee/v2/product/split/params/split_parameters.rb b/lib/mindee/v2/product/split/params/split_parameters.rb index c290e3ba..57dd79b8 100644 --- a/lib/mindee/v2/product/split/params/split_parameters.rb +++ b/lib/mindee/v2/product/split/params/split_parameters.rb @@ -8,7 +8,7 @@ module Params # Parameters accepted by the split utility v2 endpoint. class SplitParameters < Mindee::Input::BaseParameters # @return [String] Slug for the endpoint. - def self._slug + def self.slug 'split' end diff --git a/lib/mindee/v2/product/split/split.rb b/lib/mindee/v2/product/split/split.rb new file mode 100644 index 00000000..40318cea --- /dev/null +++ b/lib/mindee/v2/product/split/split.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require_relative 'split_response' +require_relative 'params/split_parameters' + +module Mindee + module V2 + module Product + module Split + # Split product. + class Split < BaseProduct + @slug = 'split' + @params_type = Mindee::V2::Product::Split::Params::SplitParameters + @response_type = Mindee::V2::Product::Split::SplitResponse + end + end + end + end +end diff --git a/lib/mindee/v2/product/split/split_response.rb b/lib/mindee/v2/product/split/split_response.rb index 54190d5d..9fdafae1 100644 --- a/lib/mindee/v2/product/split/split_response.rb +++ b/lib/mindee/v2/product/split/split_response.rb @@ -13,9 +13,6 @@ class SplitResponse < Mindee::V2::Parsing::BaseResponse # @return [SplitInference] Parsed inference payload. attr_reader :inference - @_slug = 'split' - @_params_type = Split::Params::SplitParameters - # @param server_response [Hash] Hash parsed from the API JSON response. def initialize(server_response) super diff --git a/sig/mindee/client_v2.rbs b/sig/mindee/client_v2.rbs index b7bff121..ff4c20ef 100644 --- a/sig/mindee/client_v2.rbs +++ b/sig/mindee/client_v2.rbs @@ -2,27 +2,20 @@ OTS_OWNER: String module Mindee - interface _ParametersFactory[T] - def from_hash: (params: untyped) -> T - end class ClientV2 - attr_reader mindee_api: HTTP::MindeeApiV2 + private attr_reader mindee_api: HTTP::MindeeApiV2 def logger: () -> Logger def initialize: (?api_key: String) -> void - def get_inference: (String) -> Mindee::Parsing::V2::InferenceResponse - def get_result: [T] (HTTP::_ResponseFactory[T] response_class, String inference_id) -> T + def get_inference: (String inference_id) -> Parsing::V2::InferenceResponse + def get_result: [T] (HTTP::_ProductClass[T] product_type, String resource) -> T def get_result_url: [T] (HTTP::_ResponseFactory[T] response_class, String inference_id) -> T - def get_job: (String) -> Mindee::Parsing::V2::JobResponse - def enqueue_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters, disable_redundant_warnings: bool) -> Mindee::Parsing::V2::JobResponse - def enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::BaseParameters) -> Mindee::Parsing::V2::JobResponse - def enqueue_and_get_result: [T < Input::BaseParameters] ( - singleton(Mindee::V2::Parsing::BaseResponse), - Input::Source::LocalInputSource | Input::Source::URLInputSource, - Hash[String | Symbol, untyped] | T - ) -> Mindee::V2::Parsing::BaseResponse[untyped] - def enqueue_and_get_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters) -> Mindee::Parsing::V2::InferenceResponse + def get_job: (String job_id) -> Parsing::V2::JobResponse + def enqueue_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters params, ?disable_redundant_warnings: bool) -> Parsing::V2::JobResponse + def enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::BaseParameters params) -> Parsing::V2::JobResponse + def enqueue_and_get_result: [T] (HTTP::_ProductClass[T] product_type, Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters params) -> T + def enqueue_and_get_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters params) -> Parsing::V2::InferenceResponse def validate_async_params: (Integer | Float, Integer | Float, Integer) -> void - def normalize_parameters: [T < Input::BaseParameters] (_ParametersFactory[T], Hash[String | Symbol, untyped] | T) -> T + def normalize_parameters: (singleton(Input::BaseParameters) param_class, Hash[String | Symbol, untyped] | Input::BaseParameters params) -> Input::BaseParameters end end diff --git a/sig/mindee/http/mindee_api_v2.rbs b/sig/mindee/http/mindee_api_v2.rbs index 60bb1269..62872370 100644 --- a/sig/mindee/http/mindee_api_v2.rbs +++ b/sig/mindee/http/mindee_api_v2.rbs @@ -1,9 +1,14 @@ -# lib/mindee/http/mindee_api_v2.rb +# lib/mindee/http/mindee_api_v2.rbs module Mindee module HTTP interface _ResponseFactory[T] def new: (Hash[String | Symbol, untyped]) -> T - def _slug: () -> String + end + + interface _ProductClass[T] + def slug: () -> String + def response_type: () -> _ResponseFactory[T] + def params_type: () -> singleton(Input::BaseParameters) end class MindeeApiV2 @@ -11,7 +16,7 @@ module Mindee def initialize: (?api_key: String?) -> void - def req_get_result: [T] (_ResponseFactory[T] response_class, String inference_id) -> T + def req_get_result: [T] (_ProductClass[T] product_type, String resource) -> T def req_post_enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::BaseParameters) -> Parsing::V2::JobResponse def req_get_inference: (String) -> Parsing::V2::InferenceResponse @@ -20,16 +25,14 @@ module Mindee def poll: (String) -> Net::HTTPResponse def inference_job_req_get: (String) -> Net::HTTPResponse def inference_result_req_get: (String) -> Net::HTTPResponse - def result_req_get: [T] (String, _ResponseFactory[T] result_class) -> Net::HTTPResponse + def result_req_get: [T] (String, _ProductClass[T] product_type) -> Net::HTTPResponse def enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::BaseParameters) -> Net::HTTPResponse? - private def req_get_job_url: (String) -> Parsing::V2::JobResponse def req_get_result_url: [T] (_ResponseFactory[T] result_class, String url) -> T def uri?: (String) -> bool def enqueue_form_options: (Array[untyped], Input::InferenceParameters) -> Array[untyped] - end end end diff --git a/sig/mindee/input/base_parameters.rbs b/sig/mindee/input/base_parameters.rbs index 428d9c1f..abccc144 100644 --- a/sig/mindee/input/base_parameters.rbs +++ b/sig/mindee/input/base_parameters.rbs @@ -3,12 +3,12 @@ module Mindee module Input class BaseParameters - attr_reader self._slug: String + attr_reader self.slug: String def self.from_hash: (params: Hash[String | Symbol, untyped]) -> instance def self.load_from_hash: (params: Hash[String | Symbol, untyped]) -> Hash[String | Symbol, untyped] - def _slug: -> String + def slug: -> String attr_reader model_id: String attr_reader file_alias: String? diff --git a/sig/mindee/input/inference_parameters.rbs b/sig/mindee/input/inference_parameters.rbs index de75acee..b88d8ef2 100644 --- a/sig/mindee/input/inference_parameters.rbs +++ b/sig/mindee/input/inference_parameters.rbs @@ -2,7 +2,7 @@ module Mindee module Input class InferenceParameters < BaseParameters - def self._slug: -> String + def self.slug: -> String attr_reader confidence: bool? attr_reader polygon: bool? diff --git a/sig/mindee/parsing/v2/inference.rbs b/sig/mindee/parsing/v2/inference.rbs index e95fc833..b6584c95 100644 --- a/sig/mindee/parsing/v2/inference.rbs +++ b/sig/mindee/parsing/v2/inference.rbs @@ -3,6 +3,10 @@ module Mindee module Parsing module V2 class Inference < Mindee::V2::Parsing::BaseInference + attr_reader self.params_type: singleton(Input::InferenceParameters) + attr_reader self.response_type: singleton(InferenceResponse) + attr_reader self.slug: String + attr_reader active_options: InferenceActiveOptions attr_reader result: InferenceResult diff --git a/sig/mindee/parsing/v2/inference_response.rbs b/sig/mindee/parsing/v2/inference_response.rbs index ae90cf40..05138748 100644 --- a/sig/mindee/parsing/v2/inference_response.rbs +++ b/sig/mindee/parsing/v2/inference_response.rbs @@ -4,7 +4,7 @@ module Mindee module V2 class InferenceResponse < Mindee::V2::Parsing::BaseResponse[Mindee::Parsing::V2::Inference] - self.@_slug: String + self.@slug: String self.@_params_type: singleton(Input::BaseParameters) attr_reader inference: Mindee::Parsing::V2::Inference @@ -14,7 +14,7 @@ module Mindee def to_s: -> String def self._params_type: () -> singleton(Input::InferenceParameters) - def self._slug: () -> String + def self.slug: () -> String end end end diff --git a/sig/mindee/v2/parsing/base_response.rbs b/sig/mindee/v2/parsing/base_response.rbs index 5cdcb0e1..8d1c7814 100644 --- a/sig/mindee/v2/parsing/base_response.rbs +++ b/sig/mindee/v2/parsing/base_response.rbs @@ -3,16 +3,7 @@ module Mindee module V2 module Parsing - interface _ParametersFactory[T] - def new: (Hash[String | Symbol, untyped]) -> T - def from_hash: (params: untyped) -> T - end class BaseResponse[T] < Mindee::Parsing::V2::CommonResponse - self.@_slug: String - self.@_params_type: singleton(Input::BaseParameters) - - def self._params_type: () -> singleton(Input::BaseParameters) - def self._slug: () -> String attr_reader inference: T end end diff --git a/sig/mindee/v2/product/base_product.rbs b/sig/mindee/v2/product/base_product.rbs new file mode 100644 index 00000000..aa6bdbe3 --- /dev/null +++ b/sig/mindee/v2/product/base_product.rbs @@ -0,0 +1,19 @@ +module Mindee + module V2 + module Product + class BaseProduct + self.@slug: String + self.@params_type: singleton(Input::BaseParameters) + self.@response_type: singleton(Parsing::BaseResponse) + + def self._params_type: () -> singleton(Input::BaseParameters) + + attr_reader self.slug: String + attr_reader self.params_type: singleton(Input::BaseParameters) + attr_reader self.response_type: singleton(Parsing::BaseResponse) + + def initialize: -> void + end + end + end +end diff --git a/sig/mindee/v2/product/classification/classification.rbs b/sig/mindee/v2/product/classification/classification.rbs new file mode 100644 index 00000000..f8097582 --- /dev/null +++ b/sig/mindee/v2/product/classification/classification.rbs @@ -0,0 +1,10 @@ +module Mindee + module V2 + module Product + module Classification + class Classification < BaseProduct + end + end + end + end +end diff --git a/sig/mindee/product/classification/classification_classifier.rbs b/sig/mindee/v2/product/classification/classification_classifier.rbs similarity index 100% rename from sig/mindee/product/classification/classification_classifier.rbs rename to sig/mindee/v2/product/classification/classification_classifier.rbs diff --git a/sig/mindee/product/classification/classification_inference.rbs b/sig/mindee/v2/product/classification/classification_inference.rbs similarity index 100% rename from sig/mindee/product/classification/classification_inference.rbs rename to sig/mindee/v2/product/classification/classification_inference.rbs diff --git a/sig/mindee/v2/product/classification/classification_response.rbs b/sig/mindee/v2/product/classification/classification_response.rbs index f2b7b755..ee34fbc1 100644 --- a/sig/mindee/v2/product/classification/classification_response.rbs +++ b/sig/mindee/v2/product/classification/classification_response.rbs @@ -5,7 +5,7 @@ module Mindee module Product module Classification class ClassificationResponse - self.@_slug: String + self.@slug: String self.@_params_type: singleton(Params::ClassificationParameters) attr_reader inference: Mindee::V2::Product::Classification::ClassificationInference @@ -15,7 +15,7 @@ module Mindee def to_s: -> String def self._params_type: () -> singleton(Params::ClassificationParameters) - def self._slug: () -> String + def self.slug: () -> String end end end diff --git a/sig/mindee/product/classification/classification_result.rbs b/sig/mindee/v2/product/classification/classification_result.rbs similarity index 100% rename from sig/mindee/product/classification/classification_result.rbs rename to sig/mindee/v2/product/classification/classification_result.rbs diff --git a/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs b/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs index 113e22e7..57b319bb 100644 --- a/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs +++ b/sig/mindee/v2/product/classification/params/classification_parameters/classification_parameters.rbs @@ -4,7 +4,7 @@ module Mindee module Classification module Params class ClassificationParameters - def self._slug: -> String + def self.slug: -> String def self.from_hash: (params: Hash[String | Symbol, untyped]) -> ClassificationParameters diff --git a/sig/mindee/v2/product/crop/crop.rbs b/sig/mindee/v2/product/crop/crop.rbs new file mode 100644 index 00000000..13774685 --- /dev/null +++ b/sig/mindee/v2/product/crop/crop.rbs @@ -0,0 +1,10 @@ +module Mindee + module V2 + module Product + module Crop + class Crop < BaseProduct + end + end + end + end +end diff --git a/sig/mindee/v2/product/crop/crop_response.rbs b/sig/mindee/v2/product/crop/crop_response.rbs index 1e5663ef..4b0c638f 100644 --- a/sig/mindee/v2/product/crop/crop_response.rbs +++ b/sig/mindee/v2/product/crop/crop_response.rbs @@ -5,7 +5,7 @@ module Mindee module Product module Crop class CropResponse - self.@_slug: String + self.@slug: String self.@_params_type: singleton(Params::CropParameters) attr_reader inference: Mindee::V2::Product::Crop::CropInference @@ -15,7 +15,7 @@ module Mindee def to_s: -> String def self._params_type: () -> singleton(Params::CropParameters) - def self._slug: () -> String + def self.slug: () -> String end end end diff --git a/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs b/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs index 8e7a42a5..1aad2a97 100644 --- a/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs +++ b/sig/mindee/v2/product/crop/params/crop_parameters/crop_parameters.rbs @@ -4,7 +4,7 @@ module Mindee module Crop module Params class CropParameters - def self._slug: -> String + def self.slug: -> String def self.from_hash: (params: Hash[String | Symbol, untyped]) -> CropParameters diff --git a/sig/mindee/v2/product/ocr/ocr.rbs b/sig/mindee/v2/product/ocr/ocr.rbs new file mode 100644 index 00000000..9bbc96d0 --- /dev/null +++ b/sig/mindee/v2/product/ocr/ocr.rbs @@ -0,0 +1,10 @@ +module Mindee + module V2 + module Product + module Ocr + class Ocr < BaseProduct + end + end + end + end +end diff --git a/sig/mindee/v2/product/ocr/ocr_response.rbs b/sig/mindee/v2/product/ocr/ocr_response.rbs index 4841dca2..609aef0e 100644 --- a/sig/mindee/v2/product/ocr/ocr_response.rbs +++ b/sig/mindee/v2/product/ocr/ocr_response.rbs @@ -5,7 +5,7 @@ module Mindee module Product module Ocr class OcrResponse - self.@_slug: String + self.@slug: String self.@_params_type: singleton(Params::OcrParameters) attr_reader inference: Mindee::V2::Product::Ocr::OcrInference @@ -15,7 +15,7 @@ module Mindee def to_s: -> String def self._params_type: () -> singleton(Params::OcrParameters) - def self._slug: () -> String + def self.slug: () -> String end end end diff --git a/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs b/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs index b33cbaa2..73172082 100644 --- a/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs +++ b/sig/mindee/v2/product/ocr/params/ocr_parameters/ocr_parameters.rbs @@ -4,7 +4,7 @@ module Mindee module Ocr module Params class OcrParameters - def self._slug: -> String + def self.slug: -> String def self.from_hash: (params: Hash[String | Symbol, untyped]) -> OcrParameters diff --git a/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs b/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs index a17053af..7011b3ca 100644 --- a/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs +++ b/sig/mindee/v2/product/split/params/split_parameters/split_parameters.rbs @@ -4,7 +4,7 @@ module Mindee module Split module Params class SplitParameters - def self._slug: -> String + def self.slug: -> String def self.from_hash: (params: Hash[String | Symbol, untyped]) -> SplitParameters diff --git a/sig/mindee/v2/product/split/split.rbs b/sig/mindee/v2/product/split/split.rbs new file mode 100644 index 00000000..835a2de1 --- /dev/null +++ b/sig/mindee/v2/product/split/split.rbs @@ -0,0 +1,10 @@ +module Mindee + module V2 + module Product + module Split + class Split < BaseProduct + end + end + end + end +end diff --git a/sig/mindee/v2/product/split/split_response.rbs b/sig/mindee/v2/product/split/split_response.rbs index fd6ea7a3..9068eeb5 100644 --- a/sig/mindee/v2/product/split/split_response.rbs +++ b/sig/mindee/v2/product/split/split_response.rbs @@ -5,7 +5,7 @@ module Mindee module Product module Split class SplitResponse - self.@_slug: String + self.@slug: String self.@_params_type: singleton(Params::SplitParameters) attr_reader inference: Mindee::V2::Product::Split::SplitInference @@ -15,7 +15,7 @@ module Mindee def to_s: -> String def self._params_type: () -> singleton(Params::SplitParameters) - def self._slug: () -> String + def self.slug: () -> String end end end diff --git a/spec/v2/product/classification/classification_integration.rb b/spec/v2/product/classification/classification_integration.rb index c44addd4..e0c0b326 100644 --- a/spec/v2/product/classification/classification_integration.rb +++ b/spec/v2/product/classification/classification_integration.rb @@ -17,10 +17,10 @@ File.join(V2_PRODUCT_DATA_DIR, 'classification', 'default_invoice.jpg') ) - params = Mindee::V2::Product::Classification::Params::ClassificationParameters.new(classification_model_id) + params = { model_id: classification_model_id } response = v2_client.enqueue_and_get_result( - Mindee::V2::Product::Classification::ClassificationResponse, + Mindee::V2::Product::Classification::Classification, input_source, params ) diff --git a/spec/v2/product/crop/crop_integration.rb b/spec/v2/product/crop/crop_integration.rb index 537e17a7..d1e435c3 100644 --- a/spec/v2/product/crop/crop_integration.rb +++ b/spec/v2/product/crop/crop_integration.rb @@ -17,10 +17,10 @@ File.join(V2_PRODUCT_DATA_DIR, 'crop', 'default_sample.jpg') ) - params = Mindee::V2::Product::Crop::Params::CropParameters.new(crop_model_id) + params = { model: crop_model_id } response = v2_client.enqueue_and_get_result( - Mindee::V2::Product::Crop::CropResponse, + Mindee::V2::Product::Crop::Crop, input_source, params ) diff --git a/spec/v2/product/ocr/ocr_integration.rb b/spec/v2/product/ocr/ocr_integration.rb index 71ff595f..53c8c972 100644 --- a/spec/v2/product/ocr/ocr_integration.rb +++ b/spec/v2/product/ocr/ocr_integration.rb @@ -17,10 +17,10 @@ File.join(V2_PRODUCT_DATA_DIR, 'ocr', 'default_sample.jpg') ) - params = Mindee::V2::Product::Ocr::Params::OcrParameters.new(ocr_model_id) + params = { model: ocr_model_id } response = v2_client.enqueue_and_get_result( - Mindee::V2::Product::Ocr::OcrResponse, + Mindee::V2::Product::Ocr::Ocr, input_source, params ) diff --git a/spec/v2/product/split/split_integration.rb b/spec/v2/product/split/split_integration.rb index 5729213c..4f25a89a 100644 --- a/spec/v2/product/split/split_integration.rb +++ b/spec/v2/product/split/split_integration.rb @@ -17,10 +17,10 @@ File.join(V2_PRODUCT_DATA_DIR, 'split', 'default_sample.pdf') ) - params = Mindee::V2::Product::Split::Params::SplitParameters.new(split_model_id) + params = { model: split_model_id } response = v2_client.enqueue_and_get_result( - Mindee::V2::Product::Split::SplitResponse, + Mindee::V2::Product::Split::Split, input_source, params ) diff --git a/spec/v2/product/split/split_spec.rb b/spec/v2/product/split/split_spec.rb index 7a0285bc..e946ee28 100644 --- a/spec/v2/product/split/split_spec.rb +++ b/spec/v2/product/split/split_spec.rb @@ -2,8 +2,9 @@ require 'json' require 'mindee' +require 'mindee/v2/product' -describe Mindee::V2::Product::Split::SplitResponse, :v2 do +describe Mindee::V2::Product::Split::Split, :v2 do let(:split_data_dir) { File.join(V2_PRODUCT_DATA_DIR, 'split') } it 'parses a single split properly' do From 3d35c1c54f467c046fbc5954b18eaaf38c320c56 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Fri, 27 Feb 2026 12:48:23 +0100 Subject: [PATCH 21/23] fixes --- docs/code_samples/v2_extraction.txt | 3 +- docs/code_samples/v2_extraction_webhook.txt | 3 +- lib/mindee/client_v2.rb | 40 +++++++++++++-------- lib/mindee/http/mindee_api_v2.rb | 18 +++++----- sig/mindee/client_v2.rbs | 8 ++--- sig/mindee/http/mindee_api_v2.rbs | 4 +-- spec/v2/product/crop/crop_integration.rb | 2 +- spec/v2/product/ocr/ocr_integration.rb | 2 +- spec/v2/product/split/split_integration.rb | 2 +- 9 files changed, 47 insertions(+), 35 deletions(-) diff --git a/docs/code_samples/v2_extraction.txt b/docs/code_samples/v2_extraction.txt index 366c53cf..50173982 100644 --- a/docs/code_samples/v2_extraction.txt +++ b/docs/code_samples/v2_extraction.txt @@ -31,7 +31,8 @@ inference_params = Mindee::Input::InferenceParameters.new( input_source = Mindee::Input::Source::PathInputSource.new(input_path) # Send for processing -response = mindee_client.enqueue_and_get_inference( +response = mindee_client.enqueue_and_get_result( + Mindee::Parsing::V2::Inference, input_source, inference_params # This parameter can also be provided as a Hash. ) diff --git a/docs/code_samples/v2_extraction_webhook.txt b/docs/code_samples/v2_extraction_webhook.txt index ae641d23..3cc6b2fb 100644 --- a/docs/code_samples/v2_extraction_webhook.txt +++ b/docs/code_samples/v2_extraction_webhook.txt @@ -32,7 +32,8 @@ inference_params = Mindee::Input::InferenceParameters.new( input_source = Mindee::Input::Source::PathInputSource.new(input_path) # Send for processing -response = mindee_client.enqueue_inference( +response = mindee_client.enqueue( + Mindee::Parsing::V2::Inference, input_source, inference_params # This parameter can also be provided as a Hash. ) diff --git a/lib/mindee/client_v2.rb b/lib/mindee/client_v2.rb index 7cd7090c..39def57c 100644 --- a/lib/mindee/client_v2.rb +++ b/lib/mindee/client_v2.rb @@ -27,11 +27,11 @@ def get_inference(inference_id) end # Retrieves a result from a given queue or URL to the result. - # @param product_type [Class] The return class. + # @param product [Class] The return class. # @param resource [String] ID of the inference or URL to the result. # @return [Mindee::Parsing::V2::BaseResponse] - def get_result(product_type, resource) - @mindee_api.req_get_result(product_type, resource) + def get_result(product, resource) + @mindee_api.req_get_result(product, resource) end # Retrieves an inference from a given queue or URL to the job. @@ -51,35 +51,42 @@ def enqueue_inference(input_source, params, disable_redundant_warnings: false) warn '[DEPRECATION] `enqueue_inference` is deprecated; use `enqueue` instead.', uplevel: 1 end normalized_params = normalize_parameters(Input::InferenceParameters, params) - enqueue(input_source, normalized_params) + enqueue(Mindee::Parsing::V2::Inference, input_source, normalized_params) end # Enqueue a document for async parsing. + # @param product [Class] The return class. # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] # The source of the input document (local file or URL). - # @param params [BaseParameters] + # @param params [Hash, InferenceParameters] Parameters for the inference. # @return [Mindee::Parsing::V2::JobResponse] - def enqueue(input_source, params) - logger.debug("Enqueueing document to model '#{params.model_id}'.") + def enqueue( + product, + input_source, + params + ) + normalized_params = normalize_parameters(product.params_type, params) + normalized_params.validate_async_params + logger.debug("Enqueueing document to model '#{normalized_params.model_id}'.") - @mindee_api.req_post_enqueue(input_source, params) + @mindee_api.req_post_enqueue(input_source, normalized_params) end # Enqueues to an asynchronous endpoint and automatically polls for a response. # - # @param product_type [Class] The return class. + # @param product [Class] The return class. # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] # The source of the input document (local file or URL). # @param params [Hash, InferenceParameters] Parameters for the inference. # @return [Mindee::Parsing::Common::ApiResponse] def enqueue_and_get_result( - product_type, + product, input_source, params ) - normalized_params = normalize_parameters(product_type.params_type, params) + enqueue_response = enqueue(product, input_source, params) + normalized_params = normalize_parameters(product.params_type, params) normalized_params.validate_async_params - enqueue_response = enqueue(input_source, normalized_params) if enqueue_response.job.id.nil? || enqueue_response.job.id.empty? logger.error("Failed enqueueing:\n#{enqueue_response.raw_http}") @@ -97,7 +104,7 @@ def enqueue_and_get_result( if poll_results.job.status == 'Failed' break elsif !poll_results.job.result_url.nil? - return get_result(product_type, poll_results.job.result_url) + return get_result(product, poll_results.job.result_url) end logger.debug( @@ -128,8 +135,11 @@ def enqueue_and_get_result( # The source of the input document (local file or URL). # @param params [Hash, InferenceParameters] Parameters for the inference. # @return [Mindee::Parsing::V2::InferenceResponse] - def enqueue_and_get_inference(input_source, params) - warn '[DEPRECATION] `enqueue_and_get_inference` is deprecated; use `enqueue_and_get_result` instead.' + def enqueue_and_get_inference(input_source, params, disable_redundant_warnings: false) + unless disable_redundant_warnings + warn '[DEPRECATION] `enqueue_and_get_inference` is deprecated; use `enqueue_and_get_result` instead.', + uplevel: 1 + end response = enqueue_and_get_result(Mindee::Parsing::V2::Inference, input_source, params) unless response.is_a?(Mindee::Parsing::V2::InferenceResponse) diff --git a/lib/mindee/http/mindee_api_v2.rb b/lib/mindee/http/mindee_api_v2.rb index 89488bdf..1c1d4b7e 100644 --- a/lib/mindee/http/mindee_api_v2.rb +++ b/lib/mindee/http/mindee_api_v2.rb @@ -20,7 +20,7 @@ def initialize(api_key: nil) # Sends a file to the inference queue. # # @param input_source [Input::Source::LocalInputSource, Input::Source::URLInputSource] - # @param params [Input::InferenceParameters] + # @param params [Input::BaseParameters] # @return [Mindee::Parsing::V2::JobResponse] # @raise [Mindee::Errors::MindeeHttpErrorV2] def req_post_enqueue(input_source, params) @@ -41,18 +41,18 @@ def req_get_inference(inference_id) end # Retrieves a result from a given queue. - # @param product_type [Class] The return class. + # @param product [Class] The return class. # @param resource [String] ID of the inference or URL to the result. # @return [Mindee::Parsing::V2::BaseResponse] - def req_get_result(product_type, resource) - return req_get_result_url(product_type.response_type, resource) if uri?(resource) + def req_get_result(product, resource) + return req_get_result_url(product.response_type, resource) if uri?(resource) @settings.check_api_key response = result_req_get( resource, - product_type + product ) - product_type.response_type.new(process_response(response)) + product.response_type.new(process_response(response)) end # Retrieves a queued job. @@ -156,10 +156,10 @@ def inference_result_req_get(queue_id) # Polls the API for the result of an inference. # # @param queue_id [String] ID of the queue. - # @param product_type [Class] The return class. + # @param product [Class] The return class. # @return [Net::HTTPResponse] - def result_req_get(queue_id, product_type) - poll("#{@settings.base_url}/products/#{product_type.slug}/results/#{queue_id}") + def result_req_get(queue_id, product) + poll("#{@settings.base_url}/products/#{product.slug}/results/#{queue_id}") end # Handle parameters for the enqueue form diff --git a/sig/mindee/client_v2.rbs b/sig/mindee/client_v2.rbs index ff4c20ef..66b5409c 100644 --- a/sig/mindee/client_v2.rbs +++ b/sig/mindee/client_v2.rbs @@ -8,13 +8,13 @@ module Mindee def logger: () -> Logger def initialize: (?api_key: String) -> void def get_inference: (String inference_id) -> Parsing::V2::InferenceResponse - def get_result: [T] (HTTP::_ProductClass[T] product_type, String resource) -> T + def get_result: [T] (HTTP::_ProductClass[T] product, String resource) -> T def get_result_url: [T] (HTTP::_ResponseFactory[T] response_class, String inference_id) -> T def get_job: (String job_id) -> Parsing::V2::JobResponse def enqueue_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters params, ?disable_redundant_warnings: bool) -> Parsing::V2::JobResponse - def enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::BaseParameters params) -> Parsing::V2::JobResponse - def enqueue_and_get_result: [T] (HTTP::_ProductClass[T] product_type, Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters params) -> T - def enqueue_and_get_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters params) -> Parsing::V2::InferenceResponse + def enqueue: [T] (HTTP::_ProductClass[T] product, Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::BaseParameters params) -> Parsing::V2::JobResponse + def enqueue_and_get_result: [T] (HTTP::_ProductClass[T] product, Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::BaseParameters params) -> T + def enqueue_and_get_inference: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Hash[String | Symbol, untyped] | Input::InferenceParameters params, ?disable_redundant_warnings: bool) -> Parsing::V2::InferenceResponse def validate_async_params: (Integer | Float, Integer | Float, Integer) -> void def normalize_parameters: (singleton(Input::BaseParameters) param_class, Hash[String | Symbol, untyped] | Input::BaseParameters params) -> Input::BaseParameters end diff --git a/sig/mindee/http/mindee_api_v2.rbs b/sig/mindee/http/mindee_api_v2.rbs index 62872370..c67b09b3 100644 --- a/sig/mindee/http/mindee_api_v2.rbs +++ b/sig/mindee/http/mindee_api_v2.rbs @@ -16,7 +16,7 @@ module Mindee def initialize: (?api_key: String?) -> void - def req_get_result: [T] (_ProductClass[T] product_type, String resource) -> T + def req_get_result: [T] (_ProductClass[T] product, String resource) -> T def req_post_enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::BaseParameters) -> Parsing::V2::JobResponse def req_get_inference: (String) -> Parsing::V2::InferenceResponse @@ -25,7 +25,7 @@ module Mindee def poll: (String) -> Net::HTTPResponse def inference_job_req_get: (String) -> Net::HTTPResponse def inference_result_req_get: (String) -> Net::HTTPResponse - def result_req_get: [T] (String, _ProductClass[T] product_type) -> Net::HTTPResponse + def result_req_get: [T] (String, _ProductClass[T] product) -> Net::HTTPResponse def enqueue: (Input::Source::LocalInputSource | Input::Source::URLInputSource, Input::BaseParameters) -> Net::HTTPResponse? private diff --git a/spec/v2/product/crop/crop_integration.rb b/spec/v2/product/crop/crop_integration.rb index d1e435c3..f8095c56 100644 --- a/spec/v2/product/crop/crop_integration.rb +++ b/spec/v2/product/crop/crop_integration.rb @@ -17,7 +17,7 @@ File.join(V2_PRODUCT_DATA_DIR, 'crop', 'default_sample.jpg') ) - params = { model: crop_model_id } + params = { model_id: crop_model_id } response = v2_client.enqueue_and_get_result( Mindee::V2::Product::Crop::Crop, diff --git a/spec/v2/product/ocr/ocr_integration.rb b/spec/v2/product/ocr/ocr_integration.rb index 53c8c972..b12c3e90 100644 --- a/spec/v2/product/ocr/ocr_integration.rb +++ b/spec/v2/product/ocr/ocr_integration.rb @@ -17,7 +17,7 @@ File.join(V2_PRODUCT_DATA_DIR, 'ocr', 'default_sample.jpg') ) - params = { model: ocr_model_id } + params = { model_id: ocr_model_id } response = v2_client.enqueue_and_get_result( Mindee::V2::Product::Ocr::Ocr, diff --git a/spec/v2/product/split/split_integration.rb b/spec/v2/product/split/split_integration.rb index 4f25a89a..adbfc852 100644 --- a/spec/v2/product/split/split_integration.rb +++ b/spec/v2/product/split/split_integration.rb @@ -17,7 +17,7 @@ File.join(V2_PRODUCT_DATA_DIR, 'split', 'default_sample.pdf') ) - params = { model: split_model_id } + params = { model_id: split_model_id } response = v2_client.enqueue_and_get_result( Mindee::V2::Product::Split::Split, From 8b5abd0780c42971486e4ce8c46e3cbd2164fb29 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Fri, 27 Feb 2026 14:38:31 +0100 Subject: [PATCH 22/23] apply fixes --- lib/mindee/input/local_response.rb | 7 +- lib/mindee/v2/product.rb | 1 + .../v2/product/extraction/extraction.rb | 19 + .../extraction/extraction_inference.rb | 25 ++ .../product/extraction/extraction_response.rb | 23 ++ .../product/extraction/extraction_result.rb | 15 + .../params/extraction_parameters.rb | 18 + sig/mindee/input/local_response.rbs | 2 +- .../v2/product/extraction/extraction.rbs | 12 + .../extraction/extraction_inference.rbs | 14 + .../extraction/extraction_response.rbs | 18 + .../product/extraction/extraction_result.rbs | 10 + .../params/extraction_parameters.rbs | 12 + .../classification/classification_spec.rb | 2 +- spec/v2/product/crop/crop_spec.rb | 2 +- spec/v2/product/extraction/extraction_spec.rb | 389 ++++++++++++++++++ spec/v2/product/ocr/ocr_spec.rb | 2 +- 17 files changed, 564 insertions(+), 7 deletions(-) create mode 100644 lib/mindee/v2/product/extraction/extraction.rb create mode 100644 lib/mindee/v2/product/extraction/extraction_inference.rb create mode 100644 lib/mindee/v2/product/extraction/extraction_response.rb create mode 100644 lib/mindee/v2/product/extraction/extraction_result.rb create mode 100644 lib/mindee/v2/product/extraction/params/extraction_parameters.rb create mode 100644 sig/mindee/v2/product/extraction/extraction.rbs create mode 100644 sig/mindee/v2/product/extraction/extraction_inference.rbs create mode 100644 sig/mindee/v2/product/extraction/extraction_response.rbs create mode 100644 sig/mindee/v2/product/extraction/extraction_result.rbs create mode 100644 sig/mindee/v2/product/extraction/params/extraction_parameters.rbs create mode 100644 spec/v2/product/extraction/extraction_spec.rb diff --git a/lib/mindee/input/local_response.rb b/lib/mindee/input/local_response.rb index 7d4b5779..5e6c1440 100644 --- a/lib/mindee/input/local_response.rb +++ b/lib/mindee/input/local_response.rb @@ -35,7 +35,7 @@ def initialize(input_file) # @return [Hash] def as_hash @file.rewind - file_str = @file.read + file_str = @file.read or raise 'File could not be read' JSON.parse(file_str, object_class: Hash) rescue JSON::ParserError raise Errors::MindeeInputError, "File is not a valid dict. #{file_str}" @@ -54,7 +54,8 @@ def get_hmac_signature(secret_key) algorithm = OpenSSL::Digest.new('sha256') begin @file.rewind - mac = OpenSSL::HMAC.hexdigest(algorithm, self.class.process_secret_key(secret_key), @file.read) + mac = OpenSSL::HMAC.hexdigest(algorithm, self.class.process_secret_key(secret_key), + @file.read || raise('File could not be read')) rescue StandardError raise Errors::MindeeInputError, 'Could not get HMAC signature from payload.' end @@ -70,7 +71,7 @@ def valid_hmac_signature?(secret_key, signature) # Deserializes a loaded response # @param response_class [Parsing::V2::CommonResponse] class to return. - # @return [Parsing::V2::JobResponse, Mindee::Parsing::V2::InferenceResponse] + # @return [Parsing::V2::JobResponse, Mindee::V2::Parsing::CommonResponse] def deserialize_response(response_class) response_class.new(as_hash) # : Mindee::Parsing::V2::JobResponse | Mindee::Parsing::V2::InferenceResponse rescue StandardError diff --git a/lib/mindee/v2/product.rb b/lib/mindee/v2/product.rb index 561b8af5..bc409f7e 100644 --- a/lib/mindee/v2/product.rb +++ b/lib/mindee/v2/product.rb @@ -2,5 +2,6 @@ require_relative 'product/classification/classification' require_relative 'product/crop/crop' +require_relative 'product/extraction/extraction' require_relative 'product/ocr/ocr' require_relative 'product/split/split' diff --git a/lib/mindee/v2/product/extraction/extraction.rb b/lib/mindee/v2/product/extraction/extraction.rb new file mode 100644 index 00000000..c01afee3 --- /dev/null +++ b/lib/mindee/v2/product/extraction/extraction.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require_relative 'extraction_response' +require_relative 'params/extraction_parameters' + +module Mindee + module V2 + module Product + module Extraction + # Extraction product. + # Note: currently a placeholder for the `Inference` class. + class Extraction < Mindee::Parsing::V2::Inference + @params_type = Product::Extraction::Params::ExtractionParameters + @response_type = Product::Extraction::ExtractionResponse + end + end + end + end +end diff --git a/lib/mindee/v2/product/extraction/extraction_inference.rb b/lib/mindee/v2/product/extraction/extraction_inference.rb new file mode 100644 index 00000000..08f66d17 --- /dev/null +++ b/lib/mindee/v2/product/extraction/extraction_inference.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require_relative '../../../parsing/v2/inference' +require_relative 'extraction_result' + +module Mindee + module V2 + module Product + module Extraction + # Extraction inference. + class ExtractionInference < Mindee::Parsing::V2::Inference + # @return [ExtractionResult] Parsed inference payload. + attr_reader :result + + # @param server_response [Hash] Hash representation of the JSON returned by the service. + def initialize(server_response) + super + + @result = Mindee::V2::Product::Extraction::ExtractionResult.new(server_response['result']) + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/extraction/extraction_response.rb b/lib/mindee/v2/product/extraction/extraction_response.rb new file mode 100644 index 00000000..5b9f29b7 --- /dev/null +++ b/lib/mindee/v2/product/extraction/extraction_response.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +require_relative '../../../parsing/v2/inference_response' +require_relative 'extraction_inference' + +module Mindee + module V2 + module Product + module Extraction + # HTTP response wrapper that embeds a V2 Inference. + class ExtractionResponse < Mindee::Parsing::V2::InferenceResponse + # @return [ExtractionInference] Parsed inference payload. + attr_reader :inference + + def initialize(server_response) + super + @inference = Mindee::V2::Product::Extraction::ExtractionInference.new(server_response['inference']) + end + end + end + end + end +end diff --git a/lib/mindee/v2/product/extraction/extraction_result.rb b/lib/mindee/v2/product/extraction/extraction_result.rb new file mode 100644 index 00000000..ac080ea3 --- /dev/null +++ b/lib/mindee/v2/product/extraction/extraction_result.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +require_relative '../../../parsing/v2/inference_result' + +module Mindee + module V2 + module Product + module Extraction + # Result of an extraction utility inference. + class ExtractionResult < Mindee::Parsing::V2::InferenceResult + end + end + end + end +end diff --git a/lib/mindee/v2/product/extraction/params/extraction_parameters.rb b/lib/mindee/v2/product/extraction/params/extraction_parameters.rb new file mode 100644 index 00000000..daabbc72 --- /dev/null +++ b/lib/mindee/v2/product/extraction/params/extraction_parameters.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +require_relative '../../../../input/inference_parameters' + +module Mindee + module V2 + module Product + module Extraction + module Params + # Parameters accepted by the extraction v2 endpoint. + # Currently a placeholder for the InferenceParameters class. + class ExtractionParameters < Input::InferenceParameters + end + end + end + end + end +end diff --git a/sig/mindee/input/local_response.rbs b/sig/mindee/input/local_response.rbs index 5de48877..4f0468fb 100644 --- a/sig/mindee/input/local_response.rbs +++ b/sig/mindee/input/local_response.rbs @@ -2,7 +2,7 @@ module Mindee module Input class LocalResponse - def file: -> StringIO + attr_reader file: StringIO def initialize: (File | IO | StringIO | String | Pathname | Tempfile) -> void def as_hash: -> Hash[String | Symbol, untyped] def self.process_secret_key: (String) -> String diff --git a/sig/mindee/v2/product/extraction/extraction.rbs b/sig/mindee/v2/product/extraction/extraction.rbs new file mode 100644 index 00000000..c747c7b8 --- /dev/null +++ b/sig/mindee/v2/product/extraction/extraction.rbs @@ -0,0 +1,12 @@ +module Mindee + module V2 + module Product + module Extraction + class Extraction + self.@params_type: singleton(Mindee::V2::Product::Extraction::Params::ExtractionParameters) + self.@response_type: singleton(Mindee::V2::Product::Extraction::ExtractionResponse) + end + end + end + end +end diff --git a/sig/mindee/v2/product/extraction/extraction_inference.rbs b/sig/mindee/v2/product/extraction/extraction_inference.rbs new file mode 100644 index 00000000..ffefa44b --- /dev/null +++ b/sig/mindee/v2/product/extraction/extraction_inference.rbs @@ -0,0 +1,14 @@ +module Mindee + module V2 + module Product + module Extraction + class ExtractionInference + attr_reader result: ExtractionResult + + def initialize: (Hash[String | Symbol, untyped]) -> void + def to_s: -> String + end + end + end + end +end diff --git a/sig/mindee/v2/product/extraction/extraction_response.rbs b/sig/mindee/v2/product/extraction/extraction_response.rbs new file mode 100644 index 00000000..000d7f82 --- /dev/null +++ b/sig/mindee/v2/product/extraction/extraction_response.rbs @@ -0,0 +1,18 @@ +module Mindee + module V2 + module Product + module Extraction + class ExtractionResponse + self.@_params_type: singleton(Params::ExtractionParameters) + attr_reader inference: Mindee::V2::Product::Extraction::ExtractionInference + + def initialize: (Hash[String | Symbol, untyped]) -> void + + def _params_type: -> singleton(Params::ExtractionParameters) + + def self._params_type: () -> singleton(Params::ExtractionParameters) + end + end + end + end +end diff --git a/sig/mindee/v2/product/extraction/extraction_result.rbs b/sig/mindee/v2/product/extraction/extraction_result.rbs new file mode 100644 index 00000000..975d364c --- /dev/null +++ b/sig/mindee/v2/product/extraction/extraction_result.rbs @@ -0,0 +1,10 @@ +module Mindee + module V2 + module Product + module Extraction + class ExtractionResult < Mindee::Parsing::V2::InferenceResult + end + end + end + end +end diff --git a/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs b/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs new file mode 100644 index 00000000..f93bcdae --- /dev/null +++ b/sig/mindee/v2/product/extraction/params/extraction_parameters.rbs @@ -0,0 +1,12 @@ +module Mindee + module V2 + module Product + module Extraction + module Params + class ExtractionParameters < Input::InferenceParameters + end + end + end + end + end +end diff --git a/spec/v2/product/classification/classification_spec.rb b/spec/v2/product/classification/classification_spec.rb index 12dd00ab..b27345ae 100644 --- a/spec/v2/product/classification/classification_spec.rb +++ b/spec/v2/product/classification/classification_spec.rb @@ -3,7 +3,7 @@ require 'json' require 'mindee/v2/product' -describe Mindee::V2::Product::Classification::ClassificationResponse, :v2 do +describe Mindee::V2::Product::Classification::Classification, :v2 do let(:classification_data_dir) { File.join(V2_PRODUCT_DATA_DIR, 'classification') } it 'parses a single classification properly' do diff --git a/spec/v2/product/crop/crop_spec.rb b/spec/v2/product/crop/crop_spec.rb index c6a6ce53..2ab6e1f5 100644 --- a/spec/v2/product/crop/crop_spec.rb +++ b/spec/v2/product/crop/crop_spec.rb @@ -5,7 +5,7 @@ require_relative '../../../data' -describe Mindee::V2::Product::Crop::CropResponse do +describe Mindee::V2::Product::Crop::Crop do let(:crop_data_dir) { File.join(V2_PRODUCT_DATA_DIR, 'crop') } it 'parses a single crop properly' do diff --git a/spec/v2/product/extraction/extraction_spec.rb b/spec/v2/product/extraction/extraction_spec.rb new file mode 100644 index 00000000..352a822f --- /dev/null +++ b/spec/v2/product/extraction/extraction_spec.rb @@ -0,0 +1,389 @@ +# frozen_string_literal: true + +require 'mindee' +require 'mindee/input/local_response' +require 'mindee/v2/product' + +describe 'extraction' do + let(:findoc_path) { File.join(V2_PRODUCT_DATA_DIR, 'extraction', 'financial_document') } + let(:extraction_path) { File.join(V2_PRODUCT_DATA_DIR, 'extraction') } + let(:deep_nested_field_path) { File.join(extraction_path, 'deep_nested_fields.json') } + let(:standard_field_path) { File.join(extraction_path, 'standard_field_types.json') } + let(:standard_field_rst_path) { File.join(extraction_path, 'standard_field_types.rst') } + let(:location_field_path) { File.join(findoc_path, 'complete_with_coordinates.json') } + let(:raw_text_json_path) { File.join(extraction_path, 'raw_texts.json') } + let(:raw_text_str_path) { File.join(extraction_path, 'raw_texts.txt') } + let(:blank_path) { File.join(findoc_path, 'blank.json') } + let(:complete_path) { File.join(findoc_path, 'complete.json') } + let(:rag_matched_path) { File.join(extraction_path, 'rag_matched.json') } + let(:rag_not_matched_path) { File.join(extraction_path, 'rag_not_matched.json') } + let(:text_context_path) { File.join(extraction_path, 'text_context_enabled.json') } + + def load_v2_extraction_inference(resource_path) + local_response = Mindee::Input::LocalResponse.new(resource_path) + local_response.deserialize_response(Mindee::V2::Product::Extraction::ExtractionResponse) + end + + simple_field = Mindee::Parsing::V2::Field::SimpleField + object_field = Mindee::Parsing::V2::Field::ObjectField + list_field = Mindee::Parsing::V2::Field::ListField + field_confidence = Mindee::Parsing::V2::Field::FieldConfidence + + describe 'simple' do + it 'loads a blank extraction inference with valid properties' do + response = load_v2_extraction_inference(blank_path) + + fields = response.inference.result.fields + expect(fields).not_to be_empty + expect(fields).to be_a(Mindee::Parsing::V2::Field::InferenceFields) + expect(fields.size).to eq(21) + + expect(fields).to have_key('taxes') + expect(fields['taxes']).not_to be_nil + expect(fields['taxes']).to be_a(list_field) + + expect(fields['supplier_address']).not_to be_nil + expect(fields['supplier_address']).to be_a(object_field) + + fields.each_value do |entry| + next if entry.is_a?(simple_field) && entry.value.nil? + + case entry + when simple_field + expect(entry.value).not_to be_nil + when object_field + expect(entry.fields).not_to be_nil + when list_field + expect(entry.items).not_to be_nil + else + raise "Unknown field type: #{entry.class}" + end + end + end + + it 'loads a complete inference with valid properties' do + response = load_v2_extraction_inference(complete_path) + inference = response.inference + job = inference.job + expect(job).not_to be_nil + expect(job).to be_a(Mindee::Parsing::V2::InferenceJob) + expect(job.id).to eq('12345678-1234-1234-1234-jobid1234567') + + expect(inference).not_to be_nil + expect(inference.id).to eq('12345678-1234-1234-1234-123456789abc') + + model = inference.model + expect(model).not_to be_nil + expect(model.id).to eq('12345678-1234-1234-1234-123456789abc') + + file = inference.file + expect(file).not_to be_nil + expect(file.name).to eq('complete.jpg') + expect(file.file_alias).to be_nil + expect(file.page_count).to eq(1) + expect(file.mime_type).to eq('image/jpeg') + + active_options = inference.active_options + expect(active_options).not_to be_nil + expect(active_options.raw_text).to eq(false) + expect(active_options.polygon).to eq(false) + expect(active_options.confidence).to eq(false) + expect(active_options.text_context).to eq(false) + expect(active_options.rag).to eq(false) + + fields = inference.result.fields + expect(fields).not_to be_empty + expect(fields.size).to eq(21) + + date_field = fields.get_simple_field('date') + expect(date_field).to be_a(simple_field) + expect(date_field.value).to eq('2019-11-02') + + expect(fields).to have_key('taxes') + taxes = fields.get_list_field('taxes') + expect(taxes).to be_a(list_field) + + expect(taxes.items.length).to eq(1) + expect(taxes.to_s).to be_a(String) + expect(taxes.to_s).to_not be_empty + + first_tax_item = taxes.items.first + expect(first_tax_item).to be_a(object_field) + + tax_item_obj = first_tax_item + expect(tax_item_obj.fields.size).to eq(3) + + expect(fields).to have_key('line_items') + line_items = fields.get_list_field('line_items') + expect(line_items).not_to be_nil + expect(line_items).to be_a(list_field) + first_line_item = line_items.object_items[0] + expect(first_line_item).to be_a(object_field) + expect(first_line_item.get_simple_field('quantity').value).to eq(1.0) + + base_field = tax_item_obj.fields.get_simple_field('base') + expect(base_field).to be_a(simple_field) + expect(base_field.value).to eq(31.5) + + expect(fields).to have_key('supplier_address') + supplier_address = fields.get_object_field('supplier_address') + expect(supplier_address).to be_a(object_field) + expect(supplier_address.to_s).to be_a(String) + expect(supplier_address.to_s).to_not be_empty + + country_field = supplier_address.fields.get_simple_field('country') + expect(country_field).to be_a(simple_field) + expect(country_field.value).to eq('USA') + expect(country_field.to_s).to eq('USA') + + customer_addr = fields.get_object_field('customer_address') + expect(customer_addr).to be_a(object_field) + city_field = customer_addr.fields.get_simple_field('city') + expect(city_field).to be_a(simple_field) + expect(city_field.value).to eq('New York') + + expect(inference.result.raw_text).to be_nil + end + end + + describe 'nested' do + it 'loads a deep nested object' do + response = load_v2_extraction_inference(deep_nested_field_path) + fields = response.inference.result.fields + + expect(fields['field_simple']).to be_a(simple_field) + expect(fields['field_object']).to be_a(object_field) + + field_object = fields.get_object_field('field_object') + expect(field_object.get_simple_field('sub_object_simple')).to be_a(simple_field) + expect(field_object.get_list_field('sub_object_list')).to be_a(list_field) + expect(field_object.get_object_field('sub_object_object')).to be_a(object_field) + expect(field_object.simple_fields.length).to eq(1) + expect(field_object.list_fields.length).to eq(1) + expect(field_object.object_fields.length).to eq(1) + lvl1 = field_object.fields + expect(lvl1['sub_object_list']).to be_a(list_field) + expect(lvl1['sub_object_list'].items).not_to be_empty + expect(lvl1['sub_object_list'].items[0]).to be_a(object_field) + expect(lvl1['sub_object_object']).to be_a(object_field) + + sub_object_object = lvl1.get_object_field('sub_object_object') + lvl2 = sub_object_object.fields + + expect(lvl2['sub_object_object_sub_object_list']).to be_a(list_field) + + nested_list = lvl2.get_list_field('sub_object_object_sub_object_list') + expect(nested_list.items).not_to be_empty + expect(nested_list.items.first).to be_a(object_field) + + first_item_obj = nested_list.items.first + deep_simple = first_item_obj.fields['sub_object_object_sub_object_list_simple'] + + expect(deep_simple).to be_a(simple_field) + expect(deep_simple.value).to eq('value_9') + end + end + + describe 'standard field types' do + def load_standard_fields + response = load_v2_extraction_inference(standard_field_path) + + active_options = response.inference.active_options + expect(active_options).not_to be_nil + expect(active_options.raw_text).to eq(true) + + fields = response.inference.result.fields + expect(fields).to be_a(Mindee::Parsing::V2::Field::InferenceFields) + + fields + end + + it 'recognizes simple fields' do + fields = load_standard_fields + + # low-level access + expect(fields['field_simple_string']).to be_a(simple_field) + expect(fields.get('field_simple_string')).to be_a(simple_field) + + field_simple_string = fields.get_simple_field('field_simple_string') + expect(field_simple_string).to be_a(simple_field) + expect(field_simple_string.value).to eq('field_simple_string-value') + expect(field_simple_string.confidence).to eq(field_confidence::CERTAIN) + expect(field_simple_string.to_s).to eq('field_simple_string-value') + + field_simple_int = fields.get_simple_field('field_simple_int') + expect(field_simple_int).to be_a(simple_field) + expect(field_simple_int.value).to be_a(Float) + + field_simple_float = fields.get_simple_field('field_simple_float') + expect(field_simple_float).to be_a(simple_field) + expect(field_simple_float.value).to be_a(Float) + + field_simple_bool = fields.get_simple_field('field_simple_bool') + expect(field_simple_bool).to be_a(simple_field) + expect(field_simple_bool.value).to eq(true) + expect(field_simple_bool.to_s).to eq('True') + + field_simple_null = fields.get_simple_field('field_simple_null') + expect(field_simple_null).to be_a(simple_field) + expect(field_simple_null.value).to be_nil + expect(field_simple_null.to_s).to eq('') + end + + it 'recognizes simple list fields' do + fields = load_standard_fields + + # low-level access + expect(fields['field_simple_list']).to be_a(list_field) + expect(fields.get('field_simple_list')).to be_a(list_field) + + field_simple_list = fields.get_list_field('field_simple_list') + expect(field_simple_list).to be_a(list_field) + + expect(field_simple_list.items[0]).to be_a(simple_field) + expect(field_simple_list.simple_items[0]).to be_a(simple_field) + field_simple_list.simple_items.each do |entry| + expect(entry).to be_a(simple_field) + expect(entry.value).not_to be_nil + end + end + + it 'recognizes object fields' do + fields = load_standard_fields + + # low-level access + expect(fields['field_object']).to be_a(object_field) + expect(fields.get('field_object')).to be_a(object_field) + + field_object = fields.get_object_field('field_object') + expect(field_object).to be_a(object_field) + expect(field_object.get_simple_field('subfield_1')).to be_a(simple_field) + field_object.fields.each_value do |entry| + expect(entry).to be_a(simple_field) + expect(entry.value).not_to be_nil + end + end + + it 'recognizes object list fields' do + fields = load_standard_fields + + # low-level access + expect(fields['field_object_list']).to be_a(list_field) + expect(fields.get('field_object_list')).to be_a(list_field) + + field_object_list = fields.get_list_field('field_object_list') + expect(field_object_list).to be_a(list_field) + + expect(field_object_list.items[0]).to be_a(object_field) + expect(field_object_list.object_items[0]).to be_a(object_field) + field_object_list.object_items.each do |entry| + expect(entry).to be_a(object_field) + expect(entry.fields).not_to be_nil + end + end + end + + describe 'raw_text' do + it 'exposes raw texts' do + response = load_v2_extraction_inference(raw_text_json_path) + + active_options = response.inference.active_options + expect(active_options).not_to be_nil + expect(active_options.raw_text).to eq(true) + + raw_text = response.inference.result.raw_text + expect(raw_text).not_to be_nil + expect(raw_text).to be_a(Mindee::Parsing::V2::RawText) + + expect(raw_text.to_s).to eq(File.read(raw_text_str_path, encoding: 'UTF-8')) + + expect(raw_text.pages.length).to eq(2) + first = raw_text.pages.first + expect(first).to be_a(Mindee::Parsing::V2::RawTextPage) + expect(first.content).to eq('This is the raw text of the first page...') + + raw_text.pages.each do |page| + expect(page.content).to be_a(String) + end + end + end + + describe 'rst display' do + it 'is properly exposed' do + response = load_v2_extraction_inference(standard_field_path) + rst_string = File.read(standard_field_rst_path, encoding: 'UTF-8') + + expect(response.inference).not_to be_nil + expect(response.inference.to_s).to eq(rst_string) + end + end + + describe 'field locations and confidence' do + it 'are properly exposed' do + response = load_v2_extraction_inference(location_field_path) + + expect(response.inference).not_to be_nil + + date_field = response.inference.result.fields.get_simple_field('date') + expect(date_field).to be_a(simple_field) + expect(date_field.locations).to be_an(Array) + expect(date_field.locations[0]).not_to be_nil + expect(date_field.locations[0].page).to eq(0) + + polygon = date_field.locations[0].polygon + expect(polygon[0]).to be_a(Mindee::Geometry::Point) + + expect(polygon[0].x).to eq(0.948979073166918) + expect(polygon[0].y).to eq(0.23097924535067715) + + expect(polygon[1][0]).to eq(0.85422) + expect(polygon[1][1]).to eq(0.230072) + + expect(polygon[2][0]).to eq(0.8540899268330819) + expect(polygon[2][1]).to eq(0.24365775464932288) + + expect(polygon[3][0]).to eq(0.948849) + expect(polygon[3][1]).to eq(0.244565) + + centroid = polygon.centroid + expect(centroid.x).to eq(0.9015345) + expect(centroid.y).to eq(0.23731850000000002) + + confidence = date_field.confidence + expect(confidence).to be_a(field_confidence) + # equality + expect(confidence).to eq(field_confidence::MEDIUM) + expect(confidence).to eq('Medium') + expect(confidence).to eq(2) + # less than or equal + expect(confidence).to be_lteql(field_confidence::HIGH) + expect(confidence).to be_lteql('High') + expect(confidence).to be_lteql(3) + # greater than or equal + expect(confidence).to be_gteql(field_confidence::LOW) + expect(confidence).to be_gteql('Low') + expect(confidence).to be_gteql(1) + end + end + describe 'RAG Metadata' do + it 'when matched' do + response = load_v2_extraction_inference(rag_matched_path) + expect(response.inference).not_to be_nil + expect(response.inference.result.rag.retrieved_document_id).to eq('12345abc-1234-1234-1234-123456789abc') + end + + it 'when not matched' do + response = load_v2_extraction_inference(rag_not_matched_path) + expect(response.inference).not_to be_nil + expect(response.inference.result.rag.retrieved_document_id).to be_nil + end + end + + describe 'text context' do + it 'when enabled' do + response = load_v2_extraction_inference(text_context_path) + expect(response.inference).not_to be_nil + expect(response.inference.active_options.text_context).to be_truthy + end + end +end diff --git a/spec/v2/product/ocr/ocr_spec.rb b/spec/v2/product/ocr/ocr_spec.rb index 8f31560d..039a0585 100644 --- a/spec/v2/product/ocr/ocr_spec.rb +++ b/spec/v2/product/ocr/ocr_spec.rb @@ -3,7 +3,7 @@ require 'json' require 'mindee' -describe Mindee::V2::Product::Ocr::OcrResponse, :v2 do +describe Mindee::V2::Product::Ocr::Ocr, :v2 do let(:ocr_data_dir) { File.join(V2_PRODUCT_DATA_DIR, 'ocr') } it 'parses a single page OCR response properly' do From 7acd933256af60e6af7d1a00116aa843c2b19cad Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Fri, 27 Feb 2026 16:50:03 +0100 Subject: [PATCH 23/23] fix code samples --- docs/code_samples/v2_extraction.txt | 11 ++++++----- docs/code_samples/v2_extraction_webhook.txt | 13 +++++++------ 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/docs/code_samples/v2_extraction.txt b/docs/code_samples/v2_extraction.txt index 50173982..3c9d57e2 100644 --- a/docs/code_samples/v2_extraction.txt +++ b/docs/code_samples/v2_extraction.txt @@ -1,6 +1,7 @@ # frozen_string_literal: true require 'mindee' +require 'mindee/v2/product' input_path = '/path/to/the/file.ext' api_key = 'MY_API_KEY' @@ -10,9 +11,9 @@ model_id = 'MY_MODEL_ID' mindee_client = Mindee::ClientV2.new(api_key: api_key) # Set inference parameters -inference_params = Mindee::Input::InferenceParameters.new( +inference_params = { # ID of the model, required. - model_id, + model_id: model_id, # Options: set to `true` or `false` to override defaults @@ -25,16 +26,16 @@ inference_params = Mindee::Input::InferenceParameters.new( # Boost the precision and accuracy of all extractions. # Calculate confidence scores for all fields. confidence: nil -) +} # Load a file from disk input_source = Mindee::Input::Source::PathInputSource.new(input_path) # Send for processing response = mindee_client.enqueue_and_get_result( - Mindee::Parsing::V2::Inference, + Mindee::V2::Product::Extraction::Extraction, input_source, - inference_params # This parameter can also be provided as a Hash. + inference_params ) # Print a brief summary of the parsed data diff --git a/docs/code_samples/v2_extraction_webhook.txt b/docs/code_samples/v2_extraction_webhook.txt index 3cc6b2fb..a68f31eb 100644 --- a/docs/code_samples/v2_extraction_webhook.txt +++ b/docs/code_samples/v2_extraction_webhook.txt @@ -1,6 +1,7 @@ # frozen_string_literal: true require 'mindee' +require 'mindee/v2/product' input_path = '/path/to/the/file.ext' api_key = 'MY_API_KEY' @@ -9,12 +10,12 @@ model_id = 'MY_MODEL_ID' # Init a new client mindee_client = Mindee::ClientV2.new(api_key: api_key) -# Set inference parameters -inference_params = Mindee::Input::InferenceParameters.new( +inference_params = { # ID of the model, required. - model_id, + model_id: model_id, # Add any number of webhook IDs here. webhook_ids: ['MY_WEBHOOK_ID'], + # Options: set to `true` or `false` to override defaults # Enhance extraction accuracy with Retrieval-Augmented Generation. @@ -26,16 +27,16 @@ inference_params = Mindee::Input::InferenceParameters.new( # Boost the precision and accuracy of all extractions. # Calculate confidence scores for all fields. confidence: nil -) +} # Load a file from disk input_source = Mindee::Input::Source::PathInputSource.new(input_path) # Send for processing response = mindee_client.enqueue( - Mindee::Parsing::V2::Inference, + Mindee::V2::Product::Extraction::Extraction, input_source, - inference_params # This parameter can also be provided as a Hash. + inference_params ) # Print the job ID