diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c1d75b..5a67058 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,15 +1,25 @@ cmake_minimum_required(VERSION 3.28) -project(vision.cpp VERSION 0.3.0 LANGUAGES CXX) +project(vision.cpp VERSION 0.3.1 LANGUAGES CXX) option(BUILD_SHARED_LIBS "Build shared libraries instead of static libraries" ON) option(VISP_VULKAN "Enable Vulkan support" OFF) option(VISP_DEV "Enable development mode" OFF) option(VISP_CI "Enable for continuous integration environment" OFF) option(VISP_TESTS "Build tests" ${PROJECT_IS_TOP_LEVEL}) +option(VISP_INSTALL_MODELS "Download and install default models" OFF) option(VISP_FMT_LIB "Use external fmt library instead of standard C++ " OFF) +option(VISP_STATIC_GGML "Force static build of ggml" OFF) option(VISP_ASAN "Enable AddressSanitizer" OFF) +include(GNUInstallDirs) +set(VISP_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files") +set(VISP_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files") +set(VISP_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files") +if(VISP_INSTALL_MODELS) + set(VISP_MODEL_INSTALL_DIR "${CMAKE_INSTALL_DATAROOTDIR}/visioncpp" CACHE PATH "Directory to install default models to") +endif() + if(PROJECT_IS_TOP_LEVEL) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) @@ -94,6 +104,12 @@ if(VISP_FMT_LIB) list(APPEND VISP_DEFINITIONS VISP_FMT_LIB) endif() +set(BUILD_SHARED_LIBS_SAVED ${BUILD_SHARED_LIBS}) +if(VISP_STATIC_GGML) + set(BUILD_SHARED_LIBS OFF) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) + set(GGML_STATIC ON) +endif() set(GGML_VULKAN ${VISP_VULKAN}) set(GGML_LLAMAFILE ON) if(VISP_CI) @@ -110,6 +126,7 @@ if(VISP_CI) endif() endif() add_subdirectory(depend/llama/ggml) +set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_SAVED}) # Vision.cpp libraries and executables @@ -121,15 +138,13 @@ add_subdirectory(src/cli) if(VISP_TESTS) enable_testing() add_subdirectory(tests) +endif() +if(VISP_TESTS OR VISP_INSTALL_MODELS) add_subdirectory(models) endif() # Installation -set(VISP_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files") -set(VISP_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files") -set(VISP_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files") - install(TARGETS visioncpp RUNTIME DESTINATION ${VISP_BIN_INSTALL_DIR} LIBRARY DESTINATION ${VISP_LIB_INSTALL_DIR} @@ -138,6 +153,9 @@ install(DIRECTORY include/ DESTINATION ${VISP_INCLUDE_INSTALL_DIR}) if(PROJECT_IS_TOP_LEVEL) install(FILES README.md LICENSE DESTINATION .) endif() +if(VISP_INSTALL_MODELS) + install(FILES ${VISP_DEFAULT_MODELS} DESTINATION ${VISP_MODEL_INSTALL_DIR}) +endif() install(TARGETS vision-cli RUNTIME DESTINATION ${VISP_BIN_INSTALL_DIR}) diff --git a/models/CMakeLists.txt b/models/CMakeLists.txt index a5ad052..77a3c53 100644 --- a/models/CMakeLists.txt +++ b/models/CMakeLists.txt @@ -1,37 +1,43 @@ -# Download models used in tests (happens only if VISP_TESTS is enabled) +# Download models used in tests or for installation -message(STATUS "Checking for models/MobileSAM-F16.gguf") -file(DOWNLOAD +function(download_model url filename hash) + message(STATUS "Checking for ${filename}") + file(DOWNLOAD + ${url} + ${CMAKE_CURRENT_LIST_DIR}/${filename} + EXPECTED_HASH ${hash} + SHOW_PROGRESS + ) + set(DEFAULT_MODELS "${DEFAULT_MODELS};${CMAKE_CURRENT_LIST_DIR}/${filename}" PARENT_SCOPE) +endfunction() + + +download_model( "https://huggingface.co/Acly/MobileSAM-GGUF/resolve/main/MobileSAM-F16.gguf" - ${CMAKE_CURRENT_LIST_DIR}/MobileSAM-F16.gguf - EXPECTED_HASH "SHA256=b546366475e3ad744bb2eaf7634df88e9aaf25f6622797d2de300f5a530831f7" - SHOW_PROGRESS + "MobileSAM-F16.gguf" + "SHA256=b546366475e3ad744bb2eaf7634df88e9aaf25f6622797d2de300f5a530831f7" ) -message(STATUS "Checking for models/BiRefNet-lite-F16.gguf") -file(DOWNLOAD +download_model( "https://huggingface.co/Acly/BiRefNet-GGUF/resolve/main/BiRefNet-lite-F16.gguf" - ${CMAKE_CURRENT_LIST_DIR}/BiRefNet-lite-F16.gguf - EXPECTED_HASH "SHA256=7b5397a2c98d66677f8f74317774bbeac49dbb321b8a3dc744af913db71d4fa5" - SHOW_PROGRESS + "BiRefNet-lite-F16.gguf" + "SHA256=7b5397a2c98d66677f8f74317774bbeac49dbb321b8a3dc744af913db71d4fa5" ) -message(STATUS "Checking for models/Depth-Anything-V2-Small-F16.gguf") -file(DOWNLOAD +download_model( "https://huggingface.co/Acly/Depth-Anything-V2-GGUF/resolve/main/Depth-Anything-V2-Small-F16.gguf" - ${CMAKE_CURRENT_LIST_DIR}/Depth-Anything-V2-Small-F16.gguf - EXPECTED_HASH "SHA256=0f83332d6a8b4375cd7fdcc168f3e3636f474f8e84b0959e903f513aace782f5" - SHOW_PROGRESS + "Depth-Anything-V2-Small-F16.gguf" + "SHA256=0f83332d6a8b4375cd7fdcc168f3e3636f474f8e84b0959e903f513aace782f5" ) -message(STATUS "Checking for models/MIGAN-512-places2-F16.gguf") -file(DOWNLOAD +download_model( "https://huggingface.co/Acly/MIGAN-GGUF/resolve/main/MIGAN-512-places2-F16.gguf" - ${CMAKE_CURRENT_LIST_DIR}/MIGAN-512-places2-F16.gguf - EXPECTED_HASH "SHA256=3e47592bf716d0dc306f8dc02d4476cfcdaf2c055fa3c3c8e0ced4db775eb64b" - SHOW_PROGRESS + "MIGAN-512-places2-F16.gguf" + "SHA256=3e47592bf716d0dc306f8dc02d4476cfcdaf2c055fa3c3c8e0ced4db775eb64b" ) -message(STATUS "Checking for models/RealESRGAN-x4plus_anime-6B-F16.gguf") -file(DOWNLOAD - "https://huggingface.co/Acly/Real-ESRGAN-GGUF/resolve/main/RealESRGAN-x4plus_anime-6B-F16.gguf" - ${CMAKE_CURRENT_LIST_DIR}/RealESRGAN-x4plus_anime-6B-F16.gguf - EXPECTED_HASH "SHA256=730469c5a2269cdef96d0d58aacf87bcf25d7a0d92256685808e6cdce0675c09" - SHOW_PROGRESS -) \ No newline at end of file +if(VISP_TESTS) + download_model( + "https://huggingface.co/Acly/Real-ESRGAN-GGUF/resolve/main/RealESRGAN-x4plus_anime-6B-F16.gguf" + "RealESRGAN-x4plus_anime-6B-F16.gguf" + "SHA256=730469c5a2269cdef96d0d58aacf87bcf25d7a0d92256685808e6cdce0675c09" + ) +endif() + +set(VISP_DEFAULT_MODELS "${DEFAULT_MODELS}" PARENT_SCOPE) diff --git a/src/cli/CMakeLists.txt b/src/cli/CMakeLists.txt index c5127aa..d139103 100644 --- a/src/cli/CMakeLists.txt +++ b/src/cli/CMakeLists.txt @@ -4,4 +4,13 @@ target_include_directories(vision-cli PRIVATE ..) target_compile_definitions(vision-cli PRIVATE ${VISP_ASSERT} ${VISP_DEFINITIONS}) target_compile_options(vision-cli PRIVATE ${VISP_WARNINGS} ${VISP_COMP_OPTIONS}) target_link_options(vision-cli PRIVATE ${VISP_LINK_OPTIONS}) -target_link_libraries(vision-cli PRIVATE ggml visioncpp ${VISP_FMT_LINK}) \ No newline at end of file +target_link_libraries(vision-cli PRIVATE ggml visioncpp ${VISP_FMT_LINK}) + +if(VISP_INSTALL_MODELS) + if(IS_ABSOLUTE "${VISP_MODEL_INSTALL_DIR}") + set(MODEL_DIR "${VISP_MODEL_INSTALL_DIR}") + else() + set(MODEL_DIR "${CMAKE_INSTALL_PREFIX}/${VISP_MODEL_INSTALL_DIR}") + endif() +endif() +target_compile_definitions(vision-cli PRIVATE VISP_MODEL_INSTALL_DIR="${MODEL_DIR}") diff --git a/src/cli/cli.cpp b/src/cli/cli.cpp index 81360de..6715d74 100644 --- a/src/cli/cli.cpp +++ b/src/cli/cli.cpp @@ -140,7 +140,7 @@ cli_args cli_parse(int argc, char** argv) { } else if (arg == "-o" || arg == "--output") { r.output = next_arg(argc, argv, i); } else if (arg == "-m" || arg == "--model") { - r.model = validate_path(next_arg(argc, argv, i)); + r.model = next_arg(argc, argv, i); } else if (arg == "-p" || arg == "--prompt") { r.prompt = collect_args(argc, argv, i, '-'); } else if (arg == "-b" || arg == "--backend") { @@ -245,6 +245,42 @@ char const* to_string(tensor_data_layout l) { } } +path find_model(char const* model_name_or_path) { + path p = path(model_name_or_path); + if (exists(p) || p.is_absolute()) { + return p; + } + path search_paths[5]; + search_paths[0] = path("models"); + if (char const* vision_model_dir = getenv("VISION_MODEL_DIR")) { + search_paths[1] = path(vision_model_dir); + } + if (char const* xdg_data_home = getenv("XDG_DATA_HOME")) { + search_paths[2] = path(xdg_data_home) / "visioncpp"; + } + if (char const* home = getenv("HOME")) { + search_paths[3] = path(home) / ".local/share/visioncpp"; + } + if constexpr (VISP_MODEL_INSTALL_DIR[0] != '\0') { + search_paths[4] = path(VISP_MODEL_INSTALL_DIR); + } + for (auto& sp : search_paths) { + if (!sp.empty()) { + path candidate = sp / p; + if (exists(candidate)) { + return candidate; + } + } + } + printf("Looking for %s\n", p.generic_string().c_str()); + for (auto& sp : search_paths) { + if (!sp.empty()) { + printf("Looking for %s\n", (sp / p).generic_string().c_str()); + } + } + throw except("Model file not found: {}", model_name_or_path); +} + std::tuple load_model_weights( cli_args const& args, backend_device const& dev, @@ -253,10 +289,11 @@ std::tuple load_model_weights( tensor_data_layout preferred_layout = tensor_data_layout::unknown) { timer t; - char const* model_path = args.model ? args.model : default_model; - printf("Loading model weights from '%s'... ", model_path); + path model_path = find_model(args.model ? args.model : default_model); + auto model_path_str = model_path.generic_string(); + printf("Loading model weights from '%s'... ", model_path_str.c_str()); - model_file file = model_load(model_path); + model_file file = model_load(model_path_str.c_str()); model_weights weights = model_init(file.n_tensors() + n_tensors); if (preferred_layout == tensor_data_layout::unknown) { preferred_layout = file.tensor_layout(); @@ -355,7 +392,7 @@ sam_prompt sam_parse_prompt(std::span args, i32x2 extent) { void run_sam(cli_args const& args) { backend_device backend = backend_init(args); auto [file, weights] = load_model_weights( - args, backend, "models/MobileSAM-F16.gguf", 0, backend.preferred_layout()); + args, backend, "MobileSAM-F16.gguf", 0, backend.preferred_layout()); sam_params params{}; require_inputs(args.inputs, 1, ""); @@ -409,7 +446,7 @@ void run_sam(cli_args const& args) { void run_birefnet(cli_args const& args) { backend_device backend = backend_init(args); auto [file, weights] = load_model_weights( - args, backend, "models/BiRefNet-F16.gguf", 0, backend.preferred_layout()); + args, backend, "BiRefNet-lite-F16.gguf", 0, backend.preferred_layout()); require_inputs(args.inputs, 1, ""); image_data image = image_load(args.inputs[0]); @@ -453,7 +490,7 @@ void run_birefnet(cli_args const& args) { void run_depth_anything(cli_args const& args) { backend_device backend = backend_init(args); auto [file, weights] = load_model_weights( - args, backend, "models/DepthAnythingV2-Small-F32.gguf", 0, backend.preferred_layout()); + args, backend, "DepthAnythingV2-Small-F32.gguf", 0, backend.preferred_layout()); require_inputs(args.inputs, 1, ""); image_data image = image_load(args.inputs[0]); @@ -489,7 +526,7 @@ void run_depth_anything(cli_args const& args) { void run_migan(cli_args const& args) { backend_device backend = backend_init(args); auto [file, weights] = load_model_weights( - args, backend, "models/MIGAN-512-places2-F16.gguf", backend.preferred_layout()); + args, backend, "MIGAN-512-places2-F16.gguf", backend.preferred_layout()); migan_params params = migan_detect_params(file); params.invert_mask = true; // -> inpaint opaque areas @@ -527,7 +564,7 @@ void run_migan(cli_args const& args) { void run_esrgan(cli_args const& args) { backend_device backend = backend_init(args); auto [file, weights] = load_model_weights( - args, backend, "models/RealESRGAN-x4.gguf", 0, backend.preferred_layout()); + args, backend, "RealESRGAN-x4.gguf", 0, backend.preferred_layout()); esrgan_params params = esrgan_detect_params(file); printf("- scale: %dx\n", params.scale); printf("- block count: %d\n", params.n_blocks);