Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 23 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,25 @@
cmake_minimum_required(VERSION 3.28)

project(vision.cpp VERSION 0.3.0 LANGUAGES CXX)
project(vision.cpp VERSION 0.3.1 LANGUAGES CXX)

option(BUILD_SHARED_LIBS "Build shared libraries instead of static libraries" ON)
option(VISP_VULKAN "Enable Vulkan support" OFF)
option(VISP_DEV "Enable development mode" OFF)
option(VISP_CI "Enable for continuous integration environment" OFF)
option(VISP_TESTS "Build tests" ${PROJECT_IS_TOP_LEVEL})
option(VISP_INSTALL_MODELS "Download and install default models" OFF)
option(VISP_FMT_LIB "Use external fmt library instead of standard C++ <format>" OFF)
option(VISP_STATIC_GGML "Force static build of ggml" OFF)
option(VISP_ASAN "Enable AddressSanitizer" OFF)

include(GNUInstallDirs)
set(VISP_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
set(VISP_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
set(VISP_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
if(VISP_INSTALL_MODELS)
set(VISP_MODEL_INSTALL_DIR "${CMAKE_INSTALL_DATAROOTDIR}/visioncpp" CACHE PATH "Directory to install default models to")
endif()

if(PROJECT_IS_TOP_LEVEL)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
Expand Down Expand Up @@ -94,6 +104,12 @@ if(VISP_FMT_LIB)
list(APPEND VISP_DEFINITIONS VISP_FMT_LIB)
endif()

set(BUILD_SHARED_LIBS_SAVED ${BUILD_SHARED_LIBS})
if(VISP_STATIC_GGML)
set(BUILD_SHARED_LIBS OFF)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(GGML_STATIC ON)
endif()
set(GGML_VULKAN ${VISP_VULKAN})
set(GGML_LLAMAFILE ON)
if(VISP_CI)
Expand All @@ -110,6 +126,7 @@ if(VISP_CI)
endif()
endif()
add_subdirectory(depend/llama/ggml)
set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_SAVED})

# Vision.cpp libraries and executables

Expand All @@ -121,15 +138,13 @@ add_subdirectory(src/cli)
if(VISP_TESTS)
enable_testing()
add_subdirectory(tests)
endif()
if(VISP_TESTS OR VISP_INSTALL_MODELS)
add_subdirectory(models)
endif()

# Installation

set(VISP_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
set(VISP_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
set(VISP_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")

install(TARGETS visioncpp
RUNTIME DESTINATION ${VISP_BIN_INSTALL_DIR}
LIBRARY DESTINATION ${VISP_LIB_INSTALL_DIR}
Expand All @@ -138,6 +153,9 @@ install(DIRECTORY include/ DESTINATION ${VISP_INCLUDE_INSTALL_DIR})
if(PROJECT_IS_TOP_LEVEL)
install(FILES README.md LICENSE DESTINATION .)
endif()
if(VISP_INSTALL_MODELS)
install(FILES ${VISP_DEFAULT_MODELS} DESTINATION ${VISP_MODEL_INSTALL_DIR})
endif()

install(TARGETS vision-cli RUNTIME DESTINATION ${VISP_BIN_INSTALL_DIR})

Expand Down
62 changes: 34 additions & 28 deletions models/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,37 +1,43 @@
# Download models used in tests (happens only if VISP_TESTS is enabled)
# Download models used in tests or for installation

message(STATUS "Checking for models/MobileSAM-F16.gguf")
file(DOWNLOAD
function(download_model url filename hash)
message(STATUS "Checking for ${filename}")
file(DOWNLOAD
${url}
${CMAKE_CURRENT_LIST_DIR}/${filename}
EXPECTED_HASH ${hash}
SHOW_PROGRESS
)
set(DEFAULT_MODELS "${DEFAULT_MODELS};${CMAKE_CURRENT_LIST_DIR}/${filename}" PARENT_SCOPE)
endfunction()


download_model(
"https://huggingface.co/Acly/MobileSAM-GGUF/resolve/main/MobileSAM-F16.gguf"
${CMAKE_CURRENT_LIST_DIR}/MobileSAM-F16.gguf
EXPECTED_HASH "SHA256=b546366475e3ad744bb2eaf7634df88e9aaf25f6622797d2de300f5a530831f7"
SHOW_PROGRESS
"MobileSAM-F16.gguf"
"SHA256=b546366475e3ad744bb2eaf7634df88e9aaf25f6622797d2de300f5a530831f7"
)
message(STATUS "Checking for models/BiRefNet-lite-F16.gguf")
file(DOWNLOAD
download_model(
"https://huggingface.co/Acly/BiRefNet-GGUF/resolve/main/BiRefNet-lite-F16.gguf"
${CMAKE_CURRENT_LIST_DIR}/BiRefNet-lite-F16.gguf
EXPECTED_HASH "SHA256=7b5397a2c98d66677f8f74317774bbeac49dbb321b8a3dc744af913db71d4fa5"
SHOW_PROGRESS
"BiRefNet-lite-F16.gguf"
"SHA256=7b5397a2c98d66677f8f74317774bbeac49dbb321b8a3dc744af913db71d4fa5"
)
message(STATUS "Checking for models/Depth-Anything-V2-Small-F16.gguf")
file(DOWNLOAD
download_model(
"https://huggingface.co/Acly/Depth-Anything-V2-GGUF/resolve/main/Depth-Anything-V2-Small-F16.gguf"
${CMAKE_CURRENT_LIST_DIR}/Depth-Anything-V2-Small-F16.gguf
EXPECTED_HASH "SHA256=0f83332d6a8b4375cd7fdcc168f3e3636f474f8e84b0959e903f513aace782f5"
SHOW_PROGRESS
"Depth-Anything-V2-Small-F16.gguf"
"SHA256=0f83332d6a8b4375cd7fdcc168f3e3636f474f8e84b0959e903f513aace782f5"
)
message(STATUS "Checking for models/MIGAN-512-places2-F16.gguf")
file(DOWNLOAD
download_model(
"https://huggingface.co/Acly/MIGAN-GGUF/resolve/main/MIGAN-512-places2-F16.gguf"
${CMAKE_CURRENT_LIST_DIR}/MIGAN-512-places2-F16.gguf
EXPECTED_HASH "SHA256=3e47592bf716d0dc306f8dc02d4476cfcdaf2c055fa3c3c8e0ced4db775eb64b"
SHOW_PROGRESS
"MIGAN-512-places2-F16.gguf"
"SHA256=3e47592bf716d0dc306f8dc02d4476cfcdaf2c055fa3c3c8e0ced4db775eb64b"
)
message(STATUS "Checking for models/RealESRGAN-x4plus_anime-6B-F16.gguf")
file(DOWNLOAD
"https://huggingface.co/Acly/Real-ESRGAN-GGUF/resolve/main/RealESRGAN-x4plus_anime-6B-F16.gguf"
${CMAKE_CURRENT_LIST_DIR}/RealESRGAN-x4plus_anime-6B-F16.gguf
EXPECTED_HASH "SHA256=730469c5a2269cdef96d0d58aacf87bcf25d7a0d92256685808e6cdce0675c09"
SHOW_PROGRESS
)
if(VISP_TESTS)
download_model(
"https://huggingface.co/Acly/Real-ESRGAN-GGUF/resolve/main/RealESRGAN-x4plus_anime-6B-F16.gguf"
"RealESRGAN-x4plus_anime-6B-F16.gguf"
"SHA256=730469c5a2269cdef96d0d58aacf87bcf25d7a0d92256685808e6cdce0675c09"
)
endif()

set(VISP_DEFAULT_MODELS "${DEFAULT_MODELS}" PARENT_SCOPE)
11 changes: 10 additions & 1 deletion src/cli/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,13 @@ target_include_directories(vision-cli PRIVATE ..)
target_compile_definitions(vision-cli PRIVATE ${VISP_ASSERT} ${VISP_DEFINITIONS})
target_compile_options(vision-cli PRIVATE ${VISP_WARNINGS} ${VISP_COMP_OPTIONS})
target_link_options(vision-cli PRIVATE ${VISP_LINK_OPTIONS})
target_link_libraries(vision-cli PRIVATE ggml visioncpp ${VISP_FMT_LINK})
target_link_libraries(vision-cli PRIVATE ggml visioncpp ${VISP_FMT_LINK})

if(VISP_INSTALL_MODELS)
if(IS_ABSOLUTE "${VISP_MODEL_INSTALL_DIR}")
set(MODEL_DIR "${VISP_MODEL_INSTALL_DIR}")
else()
set(MODEL_DIR "${CMAKE_INSTALL_PREFIX}/${VISP_MODEL_INSTALL_DIR}")
endif()
endif()
target_compile_definitions(vision-cli PRIVATE VISP_MODEL_INSTALL_DIR="${MODEL_DIR}")
55 changes: 46 additions & 9 deletions src/cli/cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ cli_args cli_parse(int argc, char** argv) {
} else if (arg == "-o" || arg == "--output") {
r.output = next_arg(argc, argv, i);
} else if (arg == "-m" || arg == "--model") {
r.model = validate_path(next_arg(argc, argv, i));
r.model = next_arg(argc, argv, i);
} else if (arg == "-p" || arg == "--prompt") {
r.prompt = collect_args(argc, argv, i, '-');
} else if (arg == "-b" || arg == "--backend") {
Expand Down Expand Up @@ -245,6 +245,42 @@ char const* to_string(tensor_data_layout l) {
}
}

path find_model(char const* model_name_or_path) {
path p = path(model_name_or_path);
if (exists(p) || p.is_absolute()) {
return p;
}
path search_paths[5];
search_paths[0] = path("models");
if (char const* vision_model_dir = getenv("VISION_MODEL_DIR")) {
search_paths[1] = path(vision_model_dir);
}
if (char const* xdg_data_home = getenv("XDG_DATA_HOME")) {
search_paths[2] = path(xdg_data_home) / "visioncpp";
}
if (char const* home = getenv("HOME")) {
search_paths[3] = path(home) / ".local/share/visioncpp";
}
if constexpr (VISP_MODEL_INSTALL_DIR[0] != '\0') {
search_paths[4] = path(VISP_MODEL_INSTALL_DIR);
}
for (auto& sp : search_paths) {
if (!sp.empty()) {
path candidate = sp / p;
if (exists(candidate)) {
return candidate;
}
}
}
printf("Looking for %s\n", p.generic_string().c_str());
for (auto& sp : search_paths) {
if (!sp.empty()) {
printf("Looking for %s\n", (sp / p).generic_string().c_str());
}
}
throw except("Model file not found: {}", model_name_or_path);
}

std::tuple<model_file, model_weights> load_model_weights(
cli_args const& args,
backend_device const& dev,
Expand All @@ -253,10 +289,11 @@ std::tuple<model_file, model_weights> load_model_weights(
tensor_data_layout preferred_layout = tensor_data_layout::unknown) {

timer t;
char const* model_path = args.model ? args.model : default_model;
printf("Loading model weights from '%s'... ", model_path);
path model_path = find_model(args.model ? args.model : default_model);
auto model_path_str = model_path.generic_string();
printf("Loading model weights from '%s'... ", model_path_str.c_str());

model_file file = model_load(model_path);
model_file file = model_load(model_path_str.c_str());
model_weights weights = model_init(file.n_tensors() + n_tensors);
if (preferred_layout == tensor_data_layout::unknown) {
preferred_layout = file.tensor_layout();
Expand Down Expand Up @@ -355,7 +392,7 @@ sam_prompt sam_parse_prompt(std::span<char const* const> args, i32x2 extent) {
void run_sam(cli_args const& args) {
backend_device backend = backend_init(args);
auto [file, weights] = load_model_weights(
args, backend, "models/MobileSAM-F16.gguf", 0, backend.preferred_layout());
args, backend, "MobileSAM-F16.gguf", 0, backend.preferred_layout());
sam_params params{};

require_inputs(args.inputs, 1, "<image>");
Expand Down Expand Up @@ -409,7 +446,7 @@ void run_sam(cli_args const& args) {
void run_birefnet(cli_args const& args) {
backend_device backend = backend_init(args);
auto [file, weights] = load_model_weights(
args, backend, "models/BiRefNet-F16.gguf", 0, backend.preferred_layout());
args, backend, "BiRefNet-lite-F16.gguf", 0, backend.preferred_layout());

require_inputs(args.inputs, 1, "<image>");
image_data image = image_load(args.inputs[0]);
Expand Down Expand Up @@ -453,7 +490,7 @@ void run_birefnet(cli_args const& args) {
void run_depth_anything(cli_args const& args) {
backend_device backend = backend_init(args);
auto [file, weights] = load_model_weights(
args, backend, "models/DepthAnythingV2-Small-F32.gguf", 0, backend.preferred_layout());
args, backend, "DepthAnythingV2-Small-F32.gguf", 0, backend.preferred_layout());

require_inputs(args.inputs, 1, "<image>");
image_data image = image_load(args.inputs[0]);
Expand Down Expand Up @@ -489,7 +526,7 @@ void run_depth_anything(cli_args const& args) {
void run_migan(cli_args const& args) {
backend_device backend = backend_init(args);
auto [file, weights] = load_model_weights(
args, backend, "models/MIGAN-512-places2-F16.gguf", backend.preferred_layout());
args, backend, "MIGAN-512-places2-F16.gguf", backend.preferred_layout());
migan_params params = migan_detect_params(file);
params.invert_mask = true; // -> inpaint opaque areas

Expand Down Expand Up @@ -527,7 +564,7 @@ void run_migan(cli_args const& args) {
void run_esrgan(cli_args const& args) {
backend_device backend = backend_init(args);
auto [file, weights] = load_model_weights(
args, backend, "models/RealESRGAN-x4.gguf", 0, backend.preferred_layout());
args, backend, "RealESRGAN-x4.gguf", 0, backend.preferred_layout());
esrgan_params params = esrgan_detect_params(file);
printf("- scale: %dx\n", params.scale);
printf("- block count: %d\n", params.n_blocks);
Expand Down