diff --git a/LICENSES/CLA-signed-list.md b/LICENSES/CLA-signed-list.md index bd7ccdfe..431143ad 100644 --- a/LICENSES/CLA-signed-list.md +++ b/LICENSES/CLA-signed-list.md @@ -21,3 +21,4 @@ C/ My company has custom contribution contract with Lutra Consulting Ltd. or I a * luxusko, 25th August 2023 * jozef-budac, 30th January 2024 * fernandinand, 13th March 2025 +* xkello, 26th January 2026 \ No newline at end of file diff --git a/server/mergin/sync/config.py b/server/mergin/sync/config.py index e616a0ca..b6ec79f7 100644 --- a/server/mergin/sync/config.py +++ b/server/mergin/sync/config.py @@ -78,3 +78,5 @@ class Configuration(object): EXCLUDED_CLONE_FILENAMES = config( "EXCLUDED_CLONE_FILENAMES", default="qgis_cfg.xml", cast=Csv() ) + # max batch size for fetch projects in batch endpoint + MAX_BATCH_SIZE = config("MAX_BATCH_SIZE", default=100, cast=int) diff --git a/server/mergin/sync/errors.py b/server/mergin/sync/errors.py index 35985ab9..a5f1fce6 100644 --- a/server/mergin/sync/errors.py +++ b/server/mergin/sync/errors.py @@ -95,3 +95,8 @@ def to_dict(self) -> Dict: class BigChunkError(ResponseError): code = "BigChunkError" detail = f"Chunk size exceeds maximum allowed size {MAX_CHUNK_SIZE} MB" + + +class BatchLimitError(ResponseError): + code = "BatchLimitExceeded" + detail = f"Batch size exceeds maximum allowed size {Configuration.MAX_BATCH_SIZE}" diff --git a/server/mergin/sync/permissions.py b/server/mergin/sync/permissions.py index 7dd042d5..e155020a 100644 --- a/server/mergin/sync/permissions.py +++ b/server/mergin/sync/permissions.py @@ -248,6 +248,29 @@ def require_project_by_uuid( return project +def check_project_permissions( + project: Project, permission: ProjectPermissions +) -> int | None: + """Check project permissions and return appropriate HTTP error code if check fails. + :param project: project + :type project: Project + :param permission: permission to check + :type permission: ProjectPermissions + :return: HTTP error code if permission check fails, None otherwise + :rtype: int | None + """ + + if not permission.check(project, current_user): + # logged in - NO, have acccess - NONE, public project - NO + if current_user.is_anonymous: + # we don't want to tell anonymous user if a private project exists + return 404 + # logged in - YES, have access - NO, public project - NO + return 403 + + return None + + def get_upload(transaction_id): upload = Upload.query.get_or_404(transaction_id) # upload to 'removed' projects is forbidden diff --git a/server/mergin/sync/project_handler.py b/server/mergin/sync/project_handler.py index 8299935a..7949dc20 100644 --- a/server/mergin/sync/project_handler.py +++ b/server/mergin/sync/project_handler.py @@ -28,3 +28,13 @@ def get_email_receivers(self, project: Project) -> List[User]: ) .all() ) + + @staticmethod + def get_projects_by_uuids(uuids: List[str]) -> [Project]: + """Gets non-deleted projects""" + return ( + Project.query.filter(Project.id.in_(uuids)) + .filter(Project.storage_params.isnot(None)) + .filter(Project.removed_at.is_(None)) + .all() + ) diff --git a/server/mergin/sync/public_api_v2.yaml b/server/mergin/sync/public_api_v2.yaml index c81be8af..c631a175 100644 --- a/server/mergin/sync/public_api_v2.yaml +++ b/server/mergin/sync/public_api_v2.yaml @@ -367,6 +367,53 @@ paths: $ref: "#/components/schemas/ProjectLocked" x-openapi-router-controller: mergin.sync.public_api_v2_controller + + /projects/batch: + post: + tags: + - project + summary: Get multiple projects by UUIDs + operationId: list_batch_projects + requestBody: + required: true + content: + application/json: + schema: + type: object + required: [ids] + properties: + ids: + type: array + description: List of project UUIDs to fetch + items: + $ref: "#/components/schemas/ProjectId" + responses: + "200": + description: Projects returned as a list of simple project objects and/or error objects. + content: + application/json: + schema: + type: object + required: [projects] + properties: + projects: + type: array + items: + oneOf: + - $ref: "#/components/schemas/Project" + - $ref: "#/components/schemas/BatchItemError" + "400": + description: Batch limit exceeded or one or more UUIDs were invalid + content: + application/problem+json: + schema: + $ref: "#/components/schemas/CustomError" + "401": + $ref: "#/components/responses/Unauthorized" + "404": + $ref: "#/components/responses/NotFound" + x-openapi-router-controller: mergin.sync.public_api_v2_controller + /workspaces/{workspace_id}/projects: get: tags: @@ -457,9 +504,7 @@ components: description: UUID of the project required: true schema: - type: string - format: uuid - pattern: \b[0-9a-f]{8}\b-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-\b[0-9a-f]{12}\b + $ref: "#/components/schemas/ProjectId" WorkspaceId: name: workspace_id in: path @@ -468,6 +513,10 @@ components: schema: type: integer schemas: + ProjectId: + type: string + format: uuid + pattern: \b[0-9a-f]{8}\b-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-\b[0-9a-f]{12}\b # Errors CustomError: type: object @@ -547,6 +596,17 @@ components: example: code: UploadError detail: "Project version could not be created (UploadError)" + BatchItemError: + type: object + properties: + id: + $ref: "#/components/schemas/ProjectId" + error: + type: integer + example: 404 + required: + - id + - error # Data ProjectRole: type: string diff --git a/server/mergin/sync/public_api_v2_controller.py b/server/mergin/sync/public_api_v2_controller.py index 2b7f124e..b42d83ed 100644 --- a/server/mergin/sync/public_api_v2_controller.py +++ b/server/mergin/sync/public_api_v2_controller.py @@ -17,12 +17,13 @@ from mergin.sync.tasks import remove_transaction_chunks -from .schemas_v2 import ProjectSchema as ProjectSchemaV2 +from .schemas_v2 import BatchErrorSchema, ProjectSchema as ProjectSchemaV2 from ..app import db from ..auth import auth_required from ..auth.models import User from .errors import ( AnotherUploadRunning, + BatchLimitError, BigChunkError, DataSyncError, ProjectLocked, @@ -41,16 +42,27 @@ project_version_created, push_finished, ) -from .permissions import ProjectPermissions, require_project_by_uuid, projects_query +from .permissions import ( + ProjectPermissions, + check_project_permissions, + require_project_by_uuid, + projects_query, +) from .public_api_controller import catch_sync_failure from .schemas import ( ProjectMemberSchema, UploadChunkSchema, ) from .storages.disk import move_to_tmp, save_to_file -from .utils import get_device_id, get_ip, get_user_agent, get_chunk_location +from .utils import ( + get_device_id, + get_ip, + get_user_agent, + get_chunk_location, + is_valid_uuid, +) from .workspace import WorkspaceRole -from ..utils import parse_order_params +from ..utils import parse_order_params, get_schema_fields_map @auth_required @@ -445,11 +457,52 @@ def list_workspace_projects(workspace_id, page, per_page, order_params=None, q=N projects = projects.filter(Project.name.ilike(f"%{q}%")) if order_params: - order_by_params = parse_order_params(Project, order_params) + schema_map = get_schema_fields_map(ProjectSchemaV2) + order_by_params = parse_order_params( + Project, order_params, field_map=schema_map + ) projects = projects.order_by(*order_by_params) - result = projects.paginate(page, per_page).items - total = projects.paginate(page, per_page).total + pagination = projects.paginate(page=page, per_page=per_page) + result = pagination.items + total = pagination.total data = ProjectSchemaV2(many=True).dump(result) return jsonify(projects=data, count=total, page=page, per_page=per_page), 200 + + +def list_batch_projects(body): + """List projects by given list of UUIDs. Limit to 100 projects per request. + + :param ids: List of project UUIDs + :type ids: List[str] + :rtype: Dict[str: List[Project]] + """ + ids = list(dict.fromkeys(body.get("ids", []))) + # remove duplicates while preserving the order + max_batch = current_app.config.get("MAX_BATCH_SIZE", 100) + if len(ids) > max_batch: + return BatchLimitError().response(400) + + projects = current_app.project_handler.get_projects_by_uuids(ids) + by_id = {str(project.id): project for project in projects} + + filtered_projects = [] + for uuid in ids: + project = by_id.get(uuid) + + if not project: + filtered_projects.append( + BatchErrorSchema().dump({"id": uuid, "error": 404}) + ) + continue + + err = check_project_permissions(project, ProjectPermissions.Read) + if err is not None: + filtered_projects.append( + BatchErrorSchema().dump({"id": uuid, "error": err}) + ) + else: + filtered_projects.append(ProjectSchemaV2().dump(project)) + + return jsonify(projects=filtered_projects), 200 diff --git a/server/mergin/sync/schemas_v2.py b/server/mergin/sync/schemas_v2.py index d6b781ee..55b5be52 100644 --- a/server/mergin/sync/schemas_v2.py +++ b/server/mergin/sync/schemas_v2.py @@ -46,3 +46,8 @@ class Meta: "workspace", "role", ) + + +class BatchErrorSchema(ma.Schema): + id = fields.UUID(required=True) + error = fields.Integer(required=True) diff --git a/server/mergin/tests/test_permissions.py b/server/mergin/tests/test_permissions.py index 230961f0..73bf5ab4 100644 --- a/server/mergin/tests/test_permissions.py +++ b/server/mergin/tests/test_permissions.py @@ -2,15 +2,29 @@ # # SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial +import pytest +from unittest.mock import patch import datetime from flask_login import AnonymousUserMixin -from ..sync.permissions import require_project, ProjectPermissions -from ..sync.models import ProjectRole +from mergin.tests import DEFAULT_USER + +from ..sync.permissions import ( + require_project, + check_project_permissions, + ProjectPermissions, +) +from ..sync.models import Project, ProjectRole from ..auth.models import User from ..app import db from ..config import Configuration -from .utils import add_user, create_project, create_workspace +from .utils import ( + add_user, + create_project, + create_workspace, + login, + logout, +) def test_project_permissions(client): @@ -116,3 +130,47 @@ def test_project_permissions(client): assert ProjectPermissions.All.check(project, user) assert ProjectPermissions.Edit.check(project, user) assert ProjectPermissions.get_user_project_role(project, user) == ProjectRole.OWNER + + +def test_check_project_permissions(client): + """Test check_project_permissions with various permission scenarios.""" + admin = User.query.filter_by(username=DEFAULT_USER[0]).first() + test_workspace = create_workspace() + + private_proj = create_project("batch_private", test_workspace, admin) + public_proj = create_project("batch_public", test_workspace, admin) + + p = Project.query.get(public_proj.id) + p.public = True + db.session.commit() + + priv_proj = Project.query.get(private_proj.id) + pub_proj = Project.query.get(public_proj.id) + + # First user with access to both projects + login(client, DEFAULT_USER[0], DEFAULT_USER[1]) + + with client: + client.get("/") + assert check_project_permissions(priv_proj, ProjectPermissions.Read) is None + assert check_project_permissions(pub_proj, ProjectPermissions.Read) is None + + # Second user with no access to private project (ensure global perms disabled) + with patch.object(Configuration, "GLOBAL_READ", False), patch.object( + Configuration, "GLOBAL_WRITE", False + ), patch.object(Configuration, "GLOBAL_ADMIN", False): + user2 = add_user("user_batch", "password") + login(client, user2.username, "password") + + with client: + client.get("/") + assert check_project_permissions(pub_proj, ProjectPermissions.Read) is None + assert check_project_permissions(priv_proj, ProjectPermissions.Read) == 403 + + # Logged-out (anonymous) user + logout(client) + + with client: + client.get("/") + assert check_project_permissions(priv_proj, ProjectPermissions.Read) == 404 + assert check_project_permissions(pub_proj, ProjectPermissions.Read) is None diff --git a/server/mergin/tests/test_project_handler.py b/server/mergin/tests/test_project_handler.py index 76040ca8..f453e0fb 100644 --- a/server/mergin/tests/test_project_handler.py +++ b/server/mergin/tests/test_project_handler.py @@ -1,3 +1,6 @@ +from datetime import datetime + +from . import DEFAULT_USER from ..sync.models import Project, ProjectRole from .utils import add_user, create_project, create_workspace from ..sync.project_handler import ProjectHandler @@ -51,3 +54,26 @@ def test_email_receivers(client): db.session.commit() receivers = project_handler.get_email_receivers(project) assert len(receivers) == 0 + + +def test_get_projects_by_uuids(client): + """Test getting projects with their UUIDs""" + project_handler = ProjectHandler() + test_workspace = create_workspace() + user = User.query.filter_by(username=DEFAULT_USER[0]).first() + p_found = create_project("p_found", test_workspace, user) + p_removed = create_project("p_removed", test_workspace, user) + p_removed.removed_at = datetime.now() + db.session.commit() + p_other = create_project("p_other", test_workspace, user) + ids = [ + str(p_found.id), + str(p_removed.id), + ] + + projects = project_handler.get_projects_by_uuids(ids) + returned_ids = [str(p.id) for p in projects] + assert str(p_found.id) in returned_ids + assert str(p_removed.id) not in returned_ids + assert str(p_other.id) not in returned_ids + assert len(projects) == 1 diff --git a/server/mergin/tests/test_public_api_v2.py b/server/mergin/tests/test_public_api_v2.py index 6e702f31..48ca9b71 100644 --- a/server/mergin/tests/test_public_api_v2.py +++ b/server/mergin/tests/test_public_api_v2.py @@ -24,6 +24,7 @@ import pytest from datetime import datetime, timedelta, timezone import json +import uuid from mergin.app import db from mergin.config import Configuration @@ -684,6 +685,17 @@ def test_list_workspace_projects(client): url + f"?page={page}&per_page={per_page}&q=1&order_params=created DESC" ) assert response.json["projects"][0]["name"] == "project_10" + # using field name instead column names for sorting + p4 = Project.query.filter(Project.name == project_name).first() + p4.disk_usage = 1234567 + db.session.commit() + response = client.get(url + f"?page=1&per_page=10&order_params=size DESC") + resp_data = json.loads(response.data) + assert resp_data["projects"][0]["name"] == project_name + + # invalid order param + response = client.get(url + f"?page=1&per_page=10&order_params=invalid DESC") + assert response.status_code == 200 # no permissions to workspace user2 = add_user("user", "password") @@ -734,3 +746,85 @@ def test_list_workspace_projects(client): # logout logout(client) assert client.get(url + "?page=1&per_page=10").status_code == 401 + + +def test_list_projects_in_batch(client): + """Test batch project listing endpoint.""" + admin = User.query.filter_by(username=DEFAULT_USER[0]).first() + test_workspace = create_workspace() + + private_proj = create_project("batch_private", test_workspace, admin) + public_proj = create_project("batch_public", test_workspace, admin) + + p = Project.query.get(public_proj.id) + p.public = True + db.session.commit() + + url = "/v2/projects/batch" + priv_id = str(private_proj.id) + pub_id = str(public_proj.id) + + # missing ids -> 400 (connexion validation) + resp = client.post(url, json={}) + assert resp.status_code == 400 + + # invalid UUID -> 400 + resp = client.post(url, json={"ids": ["invalid-uuid", pub_id]}) + assert resp.status_code == 400 + + # returns envelope with projects list + resp = client.post(url, json={"ids": [priv_id, pub_id]}) + assert resp.status_code == 200 + assert "projects" in resp.json + assert isinstance(resp.json["projects"], list) + assert len(resp.json["projects"]) == 2 + # Both projects returned as full objects for admin + for proj in resp.json["projects"]: + assert "id" in proj + assert "name" in proj # full project object + + # Second user with no access to private project + user2 = add_user("user_batch", "password") + login(client, user2.username, "password") + + with patch.object(Configuration, "GLOBAL_READ", False): + resp = client.post(url, json={"ids": [pub_id, priv_id]}) + assert resp.status_code == 200 + projects = resp.json["projects"] + assert len(projects) == 2 + + # public -> full object + pub_result = next(p for p in projects if p.get("id") == pub_id) + assert "name" in pub_result + + # private -> error 403 + priv_result = next(p for p in projects if p.get("id") == priv_id) + assert priv_result["error"] == 403 + + # global permission allows any user to list the project + with patch.object(Configuration, "GLOBAL_READ", True): + resp = client.post(url, json={"ids": [pub_id, priv_id]}) + priv_result = next(p for p in resp.json["projects"] if p.get("id") == priv_id) + assert "name" in priv_result + + # Logged-out (anonymous) user - endpoint allows access to public projects, denies private + logout(client) + resp = client.post(url, json={"ids": [pub_id, priv_id]}) + assert resp.status_code == 200 + projects = resp.json["projects"] + assert len(projects) == 2 + + # public -> full object + pub_result = next(p for p in projects if p.get("id") == pub_id) + assert "name" in pub_result + + # private -> error 404 (anonymous cannot access private) + priv_result = next(p for p in projects if p.get("id") == priv_id) + assert priv_result["error"] == 404 + + # batch size limit: generate more than allowed uuids and expect error + max_batch = client.application.config.get("MAX_BATCH_SIZE", 100) + ids = [str(uuid.uuid4()) for _ in range(max_batch + 1)] + resp = client.post(url, json={"ids": ids}) + assert resp.status_code == 400 + assert resp.json["code"] == "BatchLimitExceeded" diff --git a/server/mergin/tests/test_utils.py b/server/mergin/tests/test_utils.py index bf5f4666..00b3e1c6 100644 --- a/server/mergin/tests/test_utils.py +++ b/server/mergin/tests/test_utils.py @@ -7,6 +7,7 @@ import json import pytest from flask import url_for, current_app +from marshmallow import Schema, fields from sqlalchemy import desc import os from unittest.mock import patch @@ -14,7 +15,7 @@ from pygeodiff import GeoDiff from pathlib import PureWindowsPath -from ..utils import save_diagnostic_log_file +from ..utils import save_diagnostic_log_file, get_schema_fields_map from ..sync.utils import ( is_reserved_word, @@ -297,3 +298,27 @@ def test_save_diagnostic_log_file(client, app): with open(saved_file_path, "r") as f: content = f.read() assert content == body.decode("utf-8") + + +def test_get_schema_fields_map(): + """Test that schema map correctly resolves DB attributes, keeps all fields, and ignores virtual fields.""" + + # dummy schema for testing + class TestSchema(Schema): + # standard field -> map 'name': 'name' + name = fields.String() + # aliased field -> map 'size': 'disk_usage + size = fields.Integer(attribute="disk_usage") + # virtual fields -> skip + version = fields.Function(lambda obj: "v1") + role = fields.Method("get_role") + # excluded field - set to None in schema inheritance -> skip + hidden_field = None + + schema_map = get_schema_fields_map(TestSchema) + + expected_map = { + "name": "name", + "size": "disk_usage", + } + assert schema_map == expected_map diff --git a/server/mergin/utils.py b/server/mergin/utils.py index 9acc6124..7b062770 100644 --- a/server/mergin/utils.py +++ b/server/mergin/utils.py @@ -1,6 +1,8 @@ # Copyright (C) Lutra Consulting Limited # # SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial +import logging + import math from collections import namedtuple from datetime import datetime, timedelta, timezone @@ -8,11 +10,11 @@ import os from flask import current_app from flask_sqlalchemy import Model +from marshmallow import Schema, fields from pathvalidate import sanitize_filename from sqlalchemy import Column, JSON from sqlalchemy.sql.elements import UnaryExpression -from typing import Optional - +from typing import Optional, Type OrderParam = namedtuple("OrderParam", "name direction") @@ -33,7 +35,7 @@ def split_order_param(order_param: str) -> Optional[OrderParam]: def get_order_param( - cls: Model, order_param: OrderParam, json_sort: dict = None + cls: Model, order_param: OrderParam, json_sort: dict = None, field_map: dict = None ) -> Optional[UnaryExpression]: """Return order by clause parameter for SQL query @@ -43,15 +45,22 @@ def get_order_param( :type order_param: OrderParam :param json_sort: type mapping for sort by json field, e.g. '{"storage": "int"}', defaults to None :type json_sort: dict + :param field_map: mapping for translating public field names to internal DB columns, e.g. '{"size": "disk_usage"}' + :type field_map: dict """ + # translate field name to column name + db_column_name = order_param.name + if field_map and order_param.name in field_map: + db_column_name = field_map[order_param.name] # find candidate for nested json sort - if "." in order_param.name: - col, attr = order_param.name.split(".") + if "." in db_column_name: + col, attr = db_column_name.split(".") else: - col = order_param.name + col = db_column_name attr = None order_attr = cls.__table__.c.get(col, None) if not isinstance(order_attr, Column): + logging.warning("Ignoring invalid order parameter.") return # sort by key in JSON field if attr: @@ -80,7 +89,9 @@ def get_order_param( return order_attr.desc() -def parse_order_params(cls: Model, order_params: str, json_sort: dict = None): +def parse_order_params( + cls: Model, order_params: str, json_sort: dict = None, field_map: dict = None +) -> list[UnaryExpression]: """Convert order parameters in query string to list of order by clauses. :param cls: Db model class @@ -89,6 +100,8 @@ def parse_order_params(cls: Model, order_params: str, json_sort: dict = None): :type order_params: str :param json_sort: type mapping for sort by json field, e.g. '{"storage": "int"}', defaults to None :type json_sort: dict + :param field_map: mapping response fields to database column names, e.g. '{"size": "disk_usage"}' + :type field_map: dict :rtype: List[Column] """ @@ -97,7 +110,7 @@ def parse_order_params(cls: Model, order_params: str, json_sort: dict = None): order_param = split_order_param(p) if not order_param: continue - order_attr = get_order_param(cls, order_param, json_sort) + order_attr = get_order_param(cls, order_param, json_sort, field_map) if order_attr is not None: order_by_params.append(order_attr) return order_by_params @@ -135,3 +148,27 @@ def save_diagnostic_log_file(app: str, username: str, body: bytes) -> str: f.write(content) return file_name + + +def get_schema_fields_map(schema: Type[Schema]) -> dict: + """ + Creates a mapping of schema field names to corresponding DB columns. + This allows sorting by the API field name (e.g. 'size') while + actually sorting by the database column (e.g. 'disk_usage'). + """ + mapping = {} + for name, field in schema._declared_fields.items(): + # some fields could have been overridden with None to be excluded + if not field: + continue + # skip virtual fields as DB cannot sort by them + if isinstance( + field, (fields.Function, fields.Method, fields.Nested, fields.List) + ): + continue + if field.attribute: + mapping[name] = field.attribute + # keep the map complete + else: + mapping[name] = name + return mapping diff --git a/web-app/packages/lib/src/modules/project/views/ProjectsListViewTemplate.vue b/web-app/packages/lib/src/modules/project/views/ProjectsListViewTemplate.vue index 5d91bebc..afa28bed 100644 --- a/web-app/packages/lib/src/modules/project/views/ProjectsListViewTemplate.vue +++ b/web-app/packages/lib/src/modules/project/views/ProjectsListViewTemplate.vue @@ -32,7 +32,7 @@ SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial class="w-full" /> - +