From 0a7e114718e5683541d982ad1990f23657bcbf67 Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Mon, 9 Mar 2026 13:05:01 +0900 Subject: [PATCH 01/28] Embargo Revision --- .../invenio_records_rest/views.py | 2 + .../invenio-records/invenio_records/api.py | 3 +- modules/weko-admin/weko_admin/config.py | 27 +++- modules/weko-admin/weko_admin/utils.py | 144 +++++++++++++++++ modules/weko-records/weko_records/utils.py | 153 ++++++++++++++++++ .../weko-search-ui/weko_search_ui/config.py | 5 + .../weko-search-ui/weko_search_ui/query.py | 124 +++++++++++++- .../weko-search-ui/weko_search_ui/utils.py | 50 ++++++ scripts/demo/search_management_update.sql | 44 +++++ scripts/demo/update_search_management.sh | 23 +++ 10 files changed, 570 insertions(+), 5 deletions(-) create mode 100644 scripts/demo/search_management_update.sql create mode 100644 scripts/demo/update_search_management.sh diff --git a/modules/invenio-records-rest/invenio_records_rest/views.py b/modules/invenio-records-rest/invenio_records_rest/views.py index fff588810d..367e3947f2 100644 --- a/modules/invenio-records-rest/invenio_records_rest/views.py +++ b/modules/invenio-records-rest/invenio_records_rest/views.py @@ -855,6 +855,8 @@ def url_args_check(): links['next'] = url_for(endpoint, page=page + 1, **urlkwargs) from weko_search_ui.utils import combine_aggs search_result = combine_aggs(search_result.to_dict()) + from weko_search_ui.utils import fix_aggregations_accessrights + search_result = fix_aggregations_accessrights(search_result) return self.make_response( pid_fetcher=self.pid_fetcher, search_result=search_result, diff --git a/modules/invenio-records/invenio_records/api.py b/modules/invenio-records/invenio_records/api.py index 96d6a3ec8d..4d3419cc17 100644 --- a/modules/invenio-records/invenio_records/api.py +++ b/modules/invenio-records/invenio_records/api.py @@ -214,10 +214,11 @@ def __custom_record_metadata(cls, record_metadata: dict): Args: record_metadata (dict): Record metadata. """ - from weko_records.utils import replace_fqdn_of_file_metadata + from weko_records.utils import replace_fqdn_of_file_metadata, update_embargo_rights for k, v in record_metadata.items(): if isinstance(v, dict) and v.get('attribute_type') == 'file': replace_fqdn_of_file_metadata(v.get("attribute_value_mlt", [])) + update_embargo_rights(record_metadata) @classmethod def get_records(cls, ids, with_deleted=False): diff --git a/modules/weko-admin/weko_admin/config.py b/modules/weko-admin/weko_admin/config.py index 6cc81bebc2..11d67a15b9 100644 --- a/modules/weko-admin/weko_admin/config.py +++ b/modules/weko-admin/weko_admin/config.py @@ -1113,7 +1113,30 @@ 'mappingName':'', 'item_value':{'1': {'path': {'type': '', 'coordinates': ''}, 'path_type': {'type': 'json', 'coordinates': 'json'}} } - } + }, + { + "id":"accessrights", + "contents":"", + "contents_value":{"en":"Access Rights","ja":"アクセス権"}, + "useable_status":True, + "mapping":[ + "embargoed access", + "metadata only access", + "open access", + "restricted access", + ], + "check_val":[ + {"id":"embargoed access","contents":"embargoed access","checkStus":False}, + {"id":"metadata only access","contents":"metadata only access","checkStus":False}, + {"id":"open access","contents":"open access","checkStus":False}, + {"id":"restricted access","contents":"restricted access","checkStus":False}, + ], + "default_display":True, + "inputType":"checkbox_list", + "inputVal":"", + "mappingFlg":False, + "mappingName":"" + } ] } @@ -1335,7 +1358,7 @@ """Default restricted access settings.""" WEKO_ADMIN_RESTRICTED_ACCESS_MAX_INTEGER = 9999999 -"""max value of expiration_date and download_limit. +"""max value of expiration_date and download_limit. Any more than this and the datetime may overflow. """ WEKO_ADMIN_ITEMS_PER_PAGE_USAGE_REPORT_REMINDER = 25 diff --git a/modules/weko-admin/weko_admin/utils.py b/modules/weko-admin/weko_admin/utils.py index dc9e4a1194..8b89dffc95 100755 --- a/modules/weko-admin/weko_admin/utils.py +++ b/modules/weko-admin/weko_admin/utils.py @@ -2204,6 +2204,150 @@ def create_aggregations(facets): {'agg_mapping': 'publish_status', 'agg_value': PublishStatus.PUBLIC.value}) agg_no_permission_query.update( create_agg_by_aggregations(facet.aggregations, key, val)) + import copy + # Add aggregation conditions for accessRights + ACCESSRIGHTS_FIX_ENABLED = current_app.config.get("WEKO_SEARCH_FIX_ACCESSRIGHTS", False) + ACCESS_RIGHTS_CHOICES = current_app.config.get( + "WEKO_ACCESS_RIGHTS_CHOICES", + [ + "open access", + "embargoed access", + "restricted access", + "metadata only access", + ] + ) + ACCESS_RIGHTS_QUERY_TEMPLATE = { + "open access": { + "should": [ + {"term": {"accessRights": "open access"}}, + {"term": {"accessRights": "embargoed access"}}, + { + "nested": { + "path": "content", + "query": { + "bool": { + "should": [ + {"term": {"content.accessrole.raw": "open_access"}}, + { + "bool": { + "must": [ + {"term": {"content.accessrole.raw": "open_date"}}, + {"range": {"content.date.dateValue.raw": {"lte": "@date"}}} + ] + } + } + ] + } + } + } + } + ] + }, + "embargoed access": { + "must": [ + {"term": {"accessRights": "embargoed access"}}, + { + "nested": { + "path": "content", + "query": { + "bool": { + "should": [ + { + "bool": { + "must": [ + {"term": {"content.accessrole.raw": "open_date"}}, + {"range": {"content.date.dateValue.raw": {"gt": "@date"}}} + ] + } + } + ] + } + } + } + } + ], + "must_not": [ + { + "nested": { + "path": "content", + "query": {"term": {"content.accessrole.raw": "open_restricted"}} + } + } + ] + }, + "restricted access": { + "should": [ + {"term": {"accessRights": "restricted access"}}, + {"term": {"accessRights": "embargoed access"}}, + { + "nested": { + "path": "content", + "query": {"term": {"content.accessrole.raw": "open_restricted"}} + } + }, + { + "bool": { + "must": [ + { + "nested": { + "path": "content", + "query": {"term": {"content.accessrole.raw": "open_login"}} + } + }, + { + "bool": { + "must_not": [ + { + "nested": { + "path": "content", + "query": { + "bool": { + "must": [ + {"term": {"content.accessrole.raw": "open_date"}}, + {"range": {"content.date.dateValue.raw": {"gt": "@date"}}} + ] + } + } + } + } + ] + } + } + ] + } + } + ] + }, + "metadata only access": { + "term": {"accessRights": "metadata only access"} + } + } + def _replace_date(obj, now): + """Recursively replace @date with current date.""" + if isinstance(obj, dict): + for k, v in obj.items(): + if isinstance(v, str) and v == "@date": + obj[k] = now + else: + _replace_date(v, now) + elif isinstance(obj, list): + for v in obj: + _replace_date(v, now) + if ACCESSRIGHTS_FIX_ENABLED: + access_rights = next((facet for facet in facets if facet.mapping == "accessRights"), None) + if access_rights: + now = datetime.now().strftime("%Y-%m-%d") + must = [dict(term={d["agg_mapping"]: d["agg_value"]}) for d in access_rights.aggregations] + new_access_rights = {"new_accessRights": {"filters": {"filters": {}}}} + for access_type in ACCESS_RIGHTS_CHOICES: + query_template = copy.deepcopy(ACCESS_RIGHTS_QUERY_TEMPLATE.get(access_type, {})) + _replace_date(query_template, now) + must_copy = copy.deepcopy(must) + if query_template: + must_copy.append(query_template) + new_access_rights["new_accessRights"]["filters"]["filters"][access_type] = {"bool": {"must": must_copy}} + agg_has_permission_query.update(new_access_rights) + agg_no_permission_query.update(new_access_rights) return agg_has_permission_query, agg_no_permission_query def create_post_filters(facets): diff --git a/modules/weko-records/weko_records/utils.py b/modules/weko-records/weko_records/utils.py index d78c5978e6..f70d4693ef 100644 --- a/modules/weko-records/weko_records/utils.py +++ b/modules/weko-records/weko_records/utils.py @@ -2864,3 +2864,156 @@ def replace_fqdn_of_file_metadata(file_metadata_lst: list, file_url: list = None file["url"]["url"] = replace_fqdn(file["url"]["url"]) elif isinstance(file_url, list): file_url.append(file["url"]["url"]) + +def check_embargo_rights(access_right: str, today, accessrole_date: list = []): + """ + Determines whether the accessrights value needs to be updated based on the mapped item values and file information, and what value it should be changed to. + Args: + access_right (str): The value mapped to the item's accessRight. + today (date): The current date. + accessrole_date (list): List of (accessrole, date) tuples for registered files. + Returns: + is_update_required (bool): Whether an update is required. + change_value (str): The value after the update. + """ + # Do nothing if not 'embargoed access' + if access_right != "embargoed access": + return False, None + + # 1. If there is at least one 'open_restricted', set to 'restricted access' + if any(role == "open_restricted" for role, _ in accessrole_date): + return True, "restricted access" + + # 2. If there is at least one 'open_date' with a future date, no update required + if any(role == "open_date" and date and today and date > today for role, date in accessrole_date): + return False, None + + # 3. If there is at least one 'open_login', set to 'restricted access' + if any(role == "open_login" for role, _ in accessrole_date): + return True, "restricted access" + + # 4. If all are 'open_access' or 'open_date' with date <= today, set to 'open access' + if accessrole_date and all( + (role == "open_access") or + (role == "open_date" and date and today and date <= today) + for role, date in accessrole_date + ): + return True, "open access" + + return False, None + +def update_embargo_rights(metadata: dict) -> None: + """ + Update accessrights value in item metadata in-place. + Args: + metadata (dict): Item metadata to update. + Returns: + None (modifies metadata in-place) + """ + # Skip if config disables accessrights fix + if not current_app.config.get("WEKO_SEARCH_FIX_ACCESSRIGHTS", False): + return + item_type_id = metadata.get("item_type_id") + if not item_type_id: + return + from weko_records.serializers.utils import get_mapping + mapping = get_mapping(item_type_id, "jpcoar_mapping") + access_path = mapping.get("accessRights.@value") + if not access_path: + return + def _get_nested_value(data, path): + """Recursively get value from nested dict/list by dot-separated path.""" + keys = path.split('.') + for key in keys: + if isinstance(data, dict): + if key in data: + data = data[key] + elif 'attribute_value_mlt' in data: + found = None + for item in data['attribute_value_mlt']: + found = _get_nested_value(item, '.'.join(keys[keys.index(key):])) + if found is not None: + break + data = found + break + else: + data = None + break + elif isinstance(data, list): + found = None + for item in data: + found = _get_nested_value(item, '.'.join([key] + keys[keys.index(key)+1:])) + if found is not None: + break + data = found + break + else: + return None + return data + access_right_value = _get_nested_value(metadata, access_path) + if not access_right_value: + return + from datetime import datetime + accessrole_date = [] + today = datetime.now().date() + for v in metadata.values(): + if isinstance(v, dict) and v.get("attribute_type") == "file": + mlt = v.get("attribute_value_mlt", []) + for data in mlt: + date_val = None + accessrole_val = data.get("accessrole") + if "date" in data and isinstance(data["date"], list) and data["date"]: + date_val = data["date"][0].get("dateValue") + if date_val: + try: + date_val = datetime.strptime(date_val, "%Y-%m-%d").date() + except Exception: + date_val = None + if accessrole_val: + accessrole_date.append((accessrole_val, date_val)) + elif isinstance(v, list): + for data in v: + if isinstance(data, dict) and data.get("attribute_type") == "file": + mlt = data.get("attribute_value_mlt", []) + if mlt: + for file_data in mlt: + date_val = None + accessrole_val = file_data.get("accessrole") + if "date" in file_data and isinstance(file_data["date"], list) and file_data["date"]: + date_val = file_data["date"][0].get("dateValue") + if date_val: + try: + date_val = datetime.strptime(date_val, "%Y-%m-%d").date() + except Exception: + date_val = None + if accessrole_val: + accessrole_date.append((accessrole_val, date_val)) + else: + date_val = None + accessrole_val = data.get("accessrole") + if "date" in data and isinstance(data["date"], list) and data["date"]: + date_val = data["date"][0].get("dateValue") + if date_val: + try: + date_val = datetime.strptime(date_val, "%Y-%m-%d").date() + except Exception: + date_val = None + if accessrole_val: + accessrole_date.append((accessrole_val, date_val)) + from .utils import check_embargo_rights + is_update, change_value = check_embargo_rights(access_right_value, today, accessrole_date) + def _set_nested_value(data, path, value): + keys = path.split('.') + if keys[-1] == 'subitem_access_right' and isinstance(data.get(keys[0]), dict): + target = data[keys[0]] + target[keys[-1]] = value + mlt = target.get('attribute_value_mlt', []) + for item in mlt: + if 'subitem_access_right' in item: + item['subitem_access_right'] = value + else: + for key in keys[:-1]: + data = data.setdefault(key, {}) + data[keys[-1]] = value + if is_update and change_value: + _set_nested_value(metadata, access_path, change_value) diff --git a/modules/weko-search-ui/weko_search_ui/config.py b/modules/weko-search-ui/weko_search_ui/config.py index 6869fca640..b969f19bf9 100644 --- a/modules/weko-search-ui/weko_search_ui/config.py +++ b/modules/weko-search-ui/weko_search_ui/config.py @@ -780,3 +780,8 @@ ROCRATE_METADATA_WK_CONTEXT_V1 = "http://purl.org/wk/v1/wk-context.jsonld" """ Metadata context file name for RO-Crate+Bagit. """ + +WEKO_ACCESS_RIGHTS_CHOICES = [ "embargoed access", "metadata only access", "open access", "restricted access"] + +WEKO_SEARCH_FIX_ACCESSRIGHTS = True +""" If true, the value of accessrights will be modified. """ diff --git a/modules/weko-search-ui/weko_search_ui/query.py b/modules/weko-search-ui/weko_search_ui/query.py index 678a66d8e7..fd83fe38e5 100644 --- a/modules/weko-search-ui/weko_search_ui/query.py +++ b/modules/weko-search-ui/weko_search_ui/query.py @@ -23,7 +23,7 @@ import json import re import sys -from datetime import datetime +from datetime import datetime, timezone from functools import partial from elasticsearch_dsl.query import Bool, Q @@ -603,6 +603,122 @@ def _get_geo_shape_query(k, v, params): return qry + def __get_accessrights_query(params): + """ + Build accessrights search query from request params. + Args: + params (dict): request parameters + Returns: + dict: accessrights search query + """ + weko_search_fix_accessrights = current_app.config.get( + 'WEKO_SEARCH_FIX_ACCESSRIGHTS', False) + if not weko_search_fix_accessrights: + return None + accessrights_value = params.get('accessrights') + if not accessrights_value: + return None + if ',' in accessrights_value: + accessrights_value = ' OR '.join([ + v.strip() for v in accessrights_value.split(',') if v.strip() + ]) + accessrights_list = _split_text_by_or(accessrights_value) + weko_access_rights_choices = current_app.config.get( + 'WEKO_ACCESS_RIGHTS_CHOICES', [ + 'embargoed access', + 'metadata only access', + 'open access', + 'restricted access', + ]) + accessrights_list = [ + v for v in accessrights_list if v in weko_access_rights_choices + ] + if not accessrights_list: + return None + now = datetime.now(timezone.utc).isoformat() + + def open_access_query(now): + """Query for open access.""" + return Q( + 'bool', + should=[ + Q('term', accessRights='open access'), + Q('bool', must=[ + Q('term', accessRights='embargoed access'), + Q('bool', must_not=[ + Q('nested', path='content', query=Q('bool', must_not=[ + Q('term', **{'content.accessrole.raw': 'open_access'}), + Q('bool', must=[ + Q('term', **{'content.accessrole.raw': 'open_date'}), + Q('range', **{'content.date.dateValue.raw': {'lte': now}}) + ]) + ])) + ]) + ]) + ] + ) + + def embargoed_access_query(now): + """Query for embargoed access.""" + return Q( + 'bool', + must=[ + Q('term', accessRights='embargoed access'), + Q('nested', path='content', query=Q('bool', should=[ + Q('bool', must=[ + Q('term', **{'content.accessrole.raw': 'open_date'}), + Q('range', **{'content.date.dateValue.raw': {'gt': now}}) + ]) + ])) + ], + must_not=[ + Q('nested', path='content', query=Q('term', **{'content.accessrole.raw': 'open_restricted'})) + ] + ) + + def restricted_access_query(now): + """Query for restricted access.""" + return Q( + 'bool', + should=[ + Q('term', accessRights='restricted access'), + Q('bool', must=[ + Q('term', accessRights='embargoed access'), + Q('nested', path='content', query=Q('term', **{'content.accessrole.raw': 'open_login'})), + Q('bool', must_not=[ + Q('nested', path='content', query=Q('bool', must=[ + Q('term', **{'content.accessrole.raw': 'open_date'}), + Q('range', **{'content.date.dateValue.raw': {'gt': now}}) + ])) + ]) + ]) + ] + ) + + def metadata_only_query(): + """Query for metadata only access.""" + return Q('term', accessRights='metadata only access') + + queries = [] + for accessright in accessrights_list: + if accessright == 'open access': + queries.append(open_access_query(now)) + elif accessright == 'embargoed access': + queries.append(embargoed_access_query(now)) + elif accessright == 'restricted access': + queries.append(restricted_access_query(now)) + elif accessright == 'metadata only access': + queries.append(metadata_only_query()) + else: + queries.append(Q('term', accessRights=accessright)) + + queries = [q for q in queries if q is not None] + if not queries: + return None + if len(queries) == 1: + return queries[0] + return Q('bool', should=queries, minimum_should_match=1) + params = request.values.to_dict() if additional_params: params.update(additional_params) @@ -665,6 +781,10 @@ def _get_geo_shape_query(k, v, params): if qy: mut.append(qy) + accessrights_q = __get_accessrights_query(params) + if accessrights_q: + mut.append(accessrights_q) + except Exception as e: current_app.logger.exception( "Detail search query parser failed. err:{0}".format(e) @@ -1381,4 +1501,4 @@ def _split_text_by_or(text): pattern = r'(?<= )(?:OR|\|)(?= )' split_text_list = re.split(pattern, text) split_text_list = [item.strip() for item in split_text_list] - return split_text_list \ No newline at end of file + return split_text_list diff --git a/modules/weko-search-ui/weko_search_ui/utils.py b/modules/weko-search-ui/weko_search_ui/utils.py index 3c02cde998..c7bfc95890 100644 --- a/modules/weko-search-ui/weko_search_ui/utils.py +++ b/modules/weko-search-ui/weko_search_ui/utils.py @@ -6149,3 +6149,53 @@ def check_provide_in_system(key, item): is_provide_exist[idx] = False break return all(is_provide_exist) + +def fix_aggregations_accessrights(data): + """ + Refactor accessrights aggregation in search result for compliance. + Args: + data (dict): Aggregation result from search response. + Returns: + dict: Modified aggregation result. + """ + from flask import current_app + from weko_admin.models import FacetSearchSetting + ACCESSRIGHTS_FIX_ENABLED = current_app.config.get("WEKO_SEARCH_FIX_ACCESSRIGHTS", False) + ACCESS_RIGHTS_CHOICES = current_app.config.get( + "WEKO_ACCESS_RIGHTS_CHOICES", + [ + "embargoed access", + "metadata only access", + "open access", + "restricted access", + ] + ) + aggs = data['aggregations'] + if not ACCESSRIGHTS_FIX_ENABLED: + return data + # Get mapping for accessRights facets + mapping = FacetSearchSetting.get_activated_facets_mapping() + accessrights_keys = [k for k, v in mapping.items() if v == "accessRights"] + if "new_accessRights" not in aggs or not accessrights_keys: + return data + new_accessrights = aggs["new_accessRights"] + buckets_dict = new_accessrights.get("buckets", {}) + if not any(right in buckets_dict for right in ACCESS_RIGHTS_CHOICES): + return data + buckets = [] + for right in ACCESS_RIGHTS_CHOICES: + value = buckets_dict.get(right) + if not value: + continue + doc_count = value.get("doc_count", 0) + if doc_count == 0: + continue + buckets.append({"key": right, "doc_count": doc_count}) + # Update buckets for each accessRights facet + for key in accessrights_keys: + if key in aggs: + aggs[key]["buckets"] = buckets + # Remove temporary aggregation + aggs.pop("new_accessRights", None) + data['aggregations'] = aggs + return data diff --git a/scripts/demo/search_management_update.sql b/scripts/demo/search_management_update.sql new file mode 100644 index 0000000000..242daad44c --- /dev/null +++ b/scripts/demo/search_management_update.sql @@ -0,0 +1,44 @@ +DO $$ +DECLARE + rec RECORD; + accessrights_json jsonb := '{ + "id":"accessrights", + "contents":"", + "contents_value":{"en":"Access Rights","ja":"アクセス権"}, + "useable_status":true, + "mapping":["embargoed access","metadata only access","open access","restricted access"], + "check_val":[ + {"id":"embargoed access","contents":"embargoed access","checkStus":false}, + {"id":"metadata only access","contents":"metadata only access","checkStus":false}, + {"id":"open access","contents":"open access","checkStus":false}, + {"id":"restricted access","contents":"restricted access","checkStus":false} + ], + "default_display":true, + "inputType":"checkbox_list", + "inputVal":"", + "mappingFlg":false, + "mappingName":"" + }'::jsonb; +BEGIN + FOR rec IN SELECT id, search_conditions, search_setting_all FROM search_management LOOP + -- search_conditions + IF rec.search_conditions IS NOT NULL THEN + IF NOT EXISTS ( + SELECT 1 FROM jsonb_array_elements(rec.search_conditions) elem + WHERE elem->>'id' = 'accessrights' + ) THEN + UPDATE search_management SET search_conditions = rec.search_conditions || accessrights_json WHERE id = rec.id; + END IF; + END IF; + -- search_setting_all + IF rec.search_setting_all IS NOT NULL AND rec.search_setting_all->'detail_condition' IS NOT NULL THEN + IF NOT (rec.search_setting_all->'detail_condition')::jsonb @> '[{"id": "accessrights"}]'::jsonb THEN + UPDATE search_management SET search_setting_all = jsonb_set( + rec.search_setting_all, + '{detail_condition}', + (rec.search_setting_all->'detail_condition')::jsonb || accessrights_json + ) WHERE id = rec.id; + END IF; + END IF; + END LOOP; +END $$; diff --git a/scripts/demo/update_search_management.sh b/scripts/demo/update_search_management.sh new file mode 100644 index 0000000000..6fc11c208a --- /dev/null +++ b/scripts/demo/update_search_management.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# This script executes SQL only if records exist in the search_management table. + +## Database connection information +DB_USER="invenio" +DB_NAME="invenio" +COMPOSE_FILE="docker-compose2.yml" +POSTGRES_CONTAINER=$(docker-compose -f $COMPOSE_FILE ps -q postgresql) # Get PostgreSQL container ID + +## Get record count from search_management table +record_count=$(docker-compose -f $COMPOSE_FILE exec postgresql psql -U $DB_USER -d $DB_NAME -t -c 'SELECT count(*) FROM search_management;') +record_count=$(echo $record_count | tr -d ' ') + +## If records exist, execute SQL update +if [ "$record_count" -ge 1 ]; then + echo "Records exist in search_management table. Executing SQL." + # Copy SQL file to container + docker cp scripts/demo/search_management_update.sql $POSTGRES_CONTAINER:/tmp/ams_search_management_update.sql + # Execute SQL file + docker-compose -f $COMPOSE_FILE exec postgresql psql -U $DB_USER -d $DB_NAME -f /tmp/ams_search_management_update.sql +else + echo "search_management table is empty. SQL will not be executed." +fi From 468df2889896d44b01aef5b743ded374b887387e Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Mon, 9 Mar 2026 14:35:38 +0900 Subject: [PATCH 02/28] fix format --- modules/weko-admin/weko_admin/utils.py | 110 ++++++++++++++---- modules/weko-records/weko_records/utils.py | 90 ++++++++++---- .../weko-search-ui/weko_search_ui/query.py | 20 +++- .../weko-search-ui/weko_search_ui/utils.py | 15 ++- 4 files changed, 187 insertions(+), 48 deletions(-) diff --git a/modules/weko-admin/weko_admin/utils.py b/modules/weko-admin/weko_admin/utils.py index 8b89dffc95..3bc908cf7c 100755 --- a/modules/weko-admin/weko_admin/utils.py +++ b/modules/weko-admin/weko_admin/utils.py @@ -2198,15 +2198,23 @@ def create_aggregations(facets): val = facet.mapping # Update agg query for has permission. agg_has_permission_query.update( - create_agg_by_aggregations(facet.aggregations, key, val)) + create_agg_by_aggregations(facet.aggregations, key, val) + ) # Update agg query for no permission. facet.aggregations.append( - {'agg_mapping': 'publish_status', 'agg_value': PublishStatus.PUBLIC.value}) + { + 'agg_mapping': 'publish_status', + 'agg_value': PublishStatus.PUBLIC.value + } + ) agg_no_permission_query.update( - create_agg_by_aggregations(facet.aggregations, key, val)) + create_agg_by_aggregations(facet.aggregations, key, val) + ) import copy # Add aggregation conditions for accessRights - ACCESSRIGHTS_FIX_ENABLED = current_app.config.get("WEKO_SEARCH_FIX_ACCESSRIGHTS", False) + ACCESSRIGHTS_FIX_ENABLED = current_app.config.get( + "WEKO_SEARCH_FIX_ACCESSRIGHTS", False + ) ACCESS_RIGHTS_CHOICES = current_app.config.get( "WEKO_ACCESS_RIGHTS_CHOICES", [ @@ -2227,12 +2235,24 @@ def create_aggregations(facets): "query": { "bool": { "should": [ - {"term": {"content.accessrole.raw": "open_access"}}, + {"term": { + "content.accessrole.raw": "open_access" + }}, { "bool": { "must": [ - {"term": {"content.accessrole.raw": "open_date"}}, - {"range": {"content.date.dateValue.raw": {"lte": "@date"}}} + { + "term": { + "content.accessrole.raw": "open_date" + } + }, + { + "range": { + "content.date.dateValue.raw": { + "lte": "@date" + } + } + } ] } } @@ -2255,8 +2275,18 @@ def create_aggregations(facets): { "bool": { "must": [ - {"term": {"content.accessrole.raw": "open_date"}}, - {"range": {"content.date.dateValue.raw": {"gt": "@date"}}} + { + "term": { + "content.accessrole.raw": "open_date" + } + }, + { + "range": { + "content.date.dateValue.raw": { + "gt": "@date" + } + } + } ] } } @@ -2270,7 +2300,11 @@ def create_aggregations(facets): { "nested": { "path": "content", - "query": {"term": {"content.accessrole.raw": "open_restricted"}} + "query": { + "term": { + "content.accessrole.raw": "open_restricted" + } + } } } ] @@ -2282,7 +2316,11 @@ def create_aggregations(facets): { "nested": { "path": "content", - "query": {"term": {"content.accessrole.raw": "open_restricted"}} + "query": { + "term": { + "content.accessrole.raw": "open_restricted" + } + } } }, { @@ -2291,7 +2329,11 @@ def create_aggregations(facets): { "nested": { "path": "content", - "query": {"term": {"content.accessrole.raw": "open_login"}} + "query": { + "term": { + "content.accessrole.raw": "open_login" + } + } } }, { @@ -2303,8 +2345,18 @@ def create_aggregations(facets): "query": { "bool": { "must": [ - {"term": {"content.accessrole.raw": "open_date"}}, - {"range": {"content.date.dateValue.raw": {"gt": "@date"}}} + { + "term": { + "content.accessrole.raw": "open_date" + } + }, + { + "range": { + "content.date.dateValue.raw": { + "gt": "@date" + } + } + } ] } } @@ -2319,7 +2371,9 @@ def create_aggregations(facets): ] }, "metadata only access": { - "term": {"accessRights": "metadata only access"} + "term": { + "accessRights": "metadata only access" + } } } def _replace_date(obj, now): @@ -2334,18 +2388,34 @@ def _replace_date(obj, now): for v in obj: _replace_date(v, now) if ACCESSRIGHTS_FIX_ENABLED: - access_rights = next((facet for facet in facets if facet.mapping == "accessRights"), None) + access_rights = next( + (facet for facet in facets if facet.mapping == "accessRights"), + None + ) if access_rights: now = datetime.now().strftime("%Y-%m-%d") - must = [dict(term={d["agg_mapping"]: d["agg_value"]}) for d in access_rights.aggregations] - new_access_rights = {"new_accessRights": {"filters": {"filters": {}}}} + must = [ + dict(term={d["agg_mapping"]: d["agg_value"]}) + for d in access_rights.aggregations + ] + new_access_rights = { + "new_accessRights": { + "filters": { + "filters": {} + } + } + } for access_type in ACCESS_RIGHTS_CHOICES: - query_template = copy.deepcopy(ACCESS_RIGHTS_QUERY_TEMPLATE.get(access_type, {})) + query_template = copy.deepcopy( + ACCESS_RIGHTS_QUERY_TEMPLATE.get(access_type, {}) + ) _replace_date(query_template, now) must_copy = copy.deepcopy(must) if query_template: must_copy.append(query_template) - new_access_rights["new_accessRights"]["filters"]["filters"][access_type] = {"bool": {"must": must_copy}} + new_access_rights["new_accessRights"]["filters"]["filters"][ + access_type + ] = {"bool": {"must": must_copy}} agg_has_permission_query.update(new_access_rights) agg_no_permission_query.update(new_access_rights) return agg_has_permission_query, agg_no_permission_query diff --git a/modules/weko-records/weko_records/utils.py b/modules/weko-records/weko_records/utils.py index f70d4693ef..3400b3adbc 100644 --- a/modules/weko-records/weko_records/utils.py +++ b/modules/weko-records/weko_records/utils.py @@ -2867,11 +2867,14 @@ def replace_fqdn_of_file_metadata(file_metadata_lst: list, file_url: list = None def check_embargo_rights(access_right: str, today, accessrole_date: list = []): """ - Determines whether the accessrights value needs to be updated based on the mapped item values and file information, and what value it should be changed to. + Determines whether the accessrights value needs to be updated based on + the mapped item values and file information, and what value it should be + changed to. Args: access_right (str): The value mapped to the item's accessRight. today (date): The current date. - accessrole_date (list): List of (accessrole, date) tuples for registered files. + accessrole_date (list): List of (accessrole, date) tuples for + registered files. Returns: is_update_required (bool): Whether an update is required. change_value (str): The value after the update. @@ -2881,18 +2884,26 @@ def check_embargo_rights(access_right: str, today, accessrole_date: list = []): return False, None # 1. If there is at least one 'open_restricted', set to 'restricted access' - if any(role == "open_restricted" for role, _ in accessrole_date): + if any( + role == "open_restricted" for role, _ in accessrole_date + ): return True, "restricted access" # 2. If there is at least one 'open_date' with a future date, no update required - if any(role == "open_date" and date and today and date > today for role, date in accessrole_date): + if any( + role == "open_date" and date and today and date > today + for role, date in accessrole_date + ): return False, None # 3. If there is at least one 'open_login', set to 'restricted access' - if any(role == "open_login" for role, _ in accessrole_date): + if any( + role == "open_login" for role, _ in accessrole_date + ): return True, "restricted access" - # 4. If all are 'open_access' or 'open_date' with date <= today, set to 'open access' + # 4. If all are 'open_access' or 'open_date' with date <= today, + # set to 'open access' if accessrole_date and all( (role == "open_access") or (role == "open_date" and date and today and date <= today) @@ -2913,14 +2924,17 @@ def update_embargo_rights(metadata: dict) -> None: # Skip if config disables accessrights fix if not current_app.config.get("WEKO_SEARCH_FIX_ACCESSRIGHTS", False): return + item_type_id = metadata.get("item_type_id") if not item_type_id: return + from weko_records.serializers.utils import get_mapping mapping = get_mapping(item_type_id, "jpcoar_mapping") access_path = mapping.get("accessRights.@value") if not access_path: return + def _get_nested_value(data, path): """Recursively get value from nested dict/list by dot-separated path.""" keys = path.split('.') @@ -2931,7 +2945,10 @@ def _get_nested_value(data, path): elif 'attribute_value_mlt' in data: found = None for item in data['attribute_value_mlt']: - found = _get_nested_value(item, '.'.join(keys[keys.index(key):])) + found = _get_nested_value( + item, + '.'.join(keys[keys.index(key):]) + ) if found is not None: break data = found @@ -2942,7 +2959,10 @@ def _get_nested_value(data, path): elif isinstance(data, list): found = None for item in data: - found = _get_nested_value(item, '.'.join([key] + keys[keys.index(key)+1:])) + found = _get_nested_value( + item, + '.'.join([key] + keys[keys.index(key)+1:]) + ) if found is not None: break data = found @@ -2950,61 +2970,88 @@ def _get_nested_value(data, path): else: return None return data + access_right_value = _get_nested_value(metadata, access_path) if not access_right_value: return + from datetime import datetime accessrole_date = [] today = datetime.now().date() + for v in metadata.values(): - if isinstance(v, dict) and v.get("attribute_type") == "file": + if ( + isinstance(v, dict) and v.get("attribute_type") == "file" + ): mlt = v.get("attribute_value_mlt", []) for data in mlt: date_val = None accessrole_val = data.get("accessrole") - if "date" in data and isinstance(data["date"], list) and data["date"]: + if ( + "date" in data and + isinstance(data["date"], list) and + data["date"] + ): date_val = data["date"][0].get("dateValue") if date_val: - try: + if re.match(r"^\d{4}-\d{2}-\d{2}$", date_val): date_val = datetime.strptime(date_val, "%Y-%m-%d").date() - except Exception: + else: date_val = None if accessrole_val: accessrole_date.append((accessrole_val, date_val)) elif isinstance(v, list): for data in v: - if isinstance(data, dict) and data.get("attribute_type") == "file": + if ( + isinstance(data, dict) and + data.get("attribute_type") == "file" + ): mlt = data.get("attribute_value_mlt", []) if mlt: for file_data in mlt: date_val = None accessrole_val = file_data.get("accessrole") - if "date" in file_data and isinstance(file_data["date"], list) and file_data["date"]: + if ( + "date" in file_data and + isinstance(file_data["date"], list) and + file_data["date"] + ): date_val = file_data["date"][0].get("dateValue") if date_val: - try: + if re.match(r"^\d{4}-\d{2}-\d{2}$", date_val): date_val = datetime.strptime(date_val, "%Y-%m-%d").date() - except Exception: + else: date_val = None if accessrole_val: accessrole_date.append((accessrole_val, date_val)) else: date_val = None accessrole_val = data.get("accessrole") - if "date" in data and isinstance(data["date"], list) and data["date"]: + if ( + "date" in data and + isinstance(data["date"], list) and + data["date"] + ): date_val = data["date"][0].get("dateValue") if date_val: - try: + if re.match(r"^\d{4}-\d{2}-\d{2}$", date_val): date_val = datetime.strptime(date_val, "%Y-%m-%d").date() - except Exception: + else: date_val = None if accessrole_val: accessrole_date.append((accessrole_val, date_val)) + from .utils import check_embargo_rights - is_update, change_value = check_embargo_rights(access_right_value, today, accessrole_date) + is_update, change_value = check_embargo_rights( + access_right_value, today, accessrole_date + ) + def _set_nested_value(data, path, value): keys = path.split('.') - if keys[-1] == 'subitem_access_right' and isinstance(data.get(keys[0]), dict): + if ( + keys[-1] == 'subitem_access_right' and + isinstance(data.get(keys[0]), dict) + ): target = data[keys[0]] target[keys[-1]] = value mlt = target.get('attribute_value_mlt', []) @@ -3015,5 +3062,6 @@ def _set_nested_value(data, path, value): for key in keys[:-1]: data = data.setdefault(key, {}) data[keys[-1]] = value + if is_update and change_value: _set_nested_value(metadata, access_path, change_value) diff --git a/modules/weko-search-ui/weko_search_ui/query.py b/modules/weko-search-ui/weko_search_ui/query.py index fd83fe38e5..8651e4bef4 100644 --- a/modules/weko-search-ui/weko_search_ui/query.py +++ b/modules/weko-search-ui/weko_search_ui/query.py @@ -612,16 +612,20 @@ def __get_accessrights_query(params): dict: accessrights search query """ weko_search_fix_accessrights = current_app.config.get( - 'WEKO_SEARCH_FIX_ACCESSRIGHTS', False) + 'WEKO_SEARCH_FIX_ACCESSRIGHTS', False + ) if not weko_search_fix_accessrights: return None + accessrights_value = params.get('accessrights') if not accessrights_value: return None + if ',' in accessrights_value: accessrights_value = ' OR '.join([ v.strip() for v in accessrights_value.split(',') if v.strip() ]) + accessrights_list = _split_text_by_or(accessrights_value) weko_access_rights_choices = current_app.config.get( 'WEKO_ACCESS_RIGHTS_CHOICES', [ @@ -629,12 +633,14 @@ def __get_accessrights_query(params): 'metadata only access', 'open access', 'restricted access', - ]) + ] + ) accessrights_list = [ v for v in accessrights_list if v in weko_access_rights_choices ] if not accessrights_list: return None + now = datetime.now(timezone.utc).isoformat() def open_access_query(now): @@ -672,7 +678,10 @@ def embargoed_access_query(now): ])) ], must_not=[ - Q('nested', path='content', query=Q('term', **{'content.accessrole.raw': 'open_restricted'})) + Q( + 'nested', path='content', + query=Q('term', **{'content.accessrole.raw': 'open_restricted'}) + ) ] ) @@ -684,7 +693,10 @@ def restricted_access_query(now): Q('term', accessRights='restricted access'), Q('bool', must=[ Q('term', accessRights='embargoed access'), - Q('nested', path='content', query=Q('term', **{'content.accessrole.raw': 'open_login'})), + Q( + 'nested', path='content', + query=Q('term', **{'content.accessrole.raw': 'open_login'}) + ), Q('bool', must_not=[ Q('nested', path='content', query=Q('bool', must=[ Q('term', **{'content.accessrole.raw': 'open_date'}), diff --git a/modules/weko-search-ui/weko_search_ui/utils.py b/modules/weko-search-ui/weko_search_ui/utils.py index c7bfc95890..267ca7d9bc 100644 --- a/modules/weko-search-ui/weko_search_ui/utils.py +++ b/modules/weko-search-ui/weko_search_ui/utils.py @@ -6160,7 +6160,9 @@ def fix_aggregations_accessrights(data): """ from flask import current_app from weko_admin.models import FacetSearchSetting - ACCESSRIGHTS_FIX_ENABLED = current_app.config.get("WEKO_SEARCH_FIX_ACCESSRIGHTS", False) + ACCESSRIGHTS_FIX_ENABLED = current_app.config.get( + "WEKO_SEARCH_FIX_ACCESSRIGHTS", False + ) ACCESS_RIGHTS_CHOICES = current_app.config.get( "WEKO_ACCESS_RIGHTS_CHOICES", [ @@ -6173,15 +6175,22 @@ def fix_aggregations_accessrights(data): aggs = data['aggregations'] if not ACCESSRIGHTS_FIX_ENABLED: return data + # Get mapping for accessRights facets mapping = FacetSearchSetting.get_activated_facets_mapping() - accessrights_keys = [k for k, v in mapping.items() if v == "accessRights"] + accessrights_keys = [ + k for k, v in mapping.items() if v == "accessRights" + ] if "new_accessRights" not in aggs or not accessrights_keys: return data + new_accessrights = aggs["new_accessRights"] buckets_dict = new_accessrights.get("buckets", {}) - if not any(right in buckets_dict for right in ACCESS_RIGHTS_CHOICES): + if not any( + right in buckets_dict for right in ACCESS_RIGHTS_CHOICES + ): return data + buckets = [] for right in ACCESS_RIGHTS_CHOICES: value = buckets_dict.get(right) From 795d7cc85a04c1e3c8b3501f511ff61691c49faa Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Tue, 10 Mar 2026 17:50:42 +0900 Subject: [PATCH 03/28] fix create_aggregations --- modules/weko-admin/weko_admin/utils.py | 114 +++++++++---------------- 1 file changed, 41 insertions(+), 73 deletions(-) diff --git a/modules/weko-admin/weko_admin/utils.py b/modules/weko-admin/weko_admin/utils.py index 3bc908cf7c..97135ea19b 100755 --- a/modules/weko-admin/weko_admin/utils.py +++ b/modules/weko-admin/weko_admin/utils.py @@ -2225,45 +2225,42 @@ def create_aggregations(facets): ] ) ACCESS_RIGHTS_QUERY_TEMPLATE = { - "open access": { + "open access": { + "bool": { "should": [ {"term": {"accessRights": "open access"}}, - {"term": {"accessRights": "embargoed access"}}, { - "nested": { - "path": "content", - "query": { - "bool": { - "should": [ - {"term": { - "content.accessrole.raw": "open_access" - }}, - { + "bool": { + "must": [ + {"term": {"accessRights": "embargoed access"}}, + { + "nested": { + "path": "content", + "query": { "bool": { - "must": [ - { - "term": { - "content.accessrole.raw": "open_date" - } - }, + "must_not": [ + {"term": {"content.accessrole.raw": "open_access"}}, { - "range": { - "content.date.dateValue.raw": { - "lte": "@date" - } + "bool": { + "must": [ + {"term": {"content.accessrole.raw": "open_date"}}, + {"range": {"content.date.dateValue.raw": {"lte": "@date"}}} + ] } } ] } } - ] + } } - } + ] } } ] - }, - "embargoed access": { + } + }, + "embargoed access": { + "bool": { "must": [ {"term": {"accessRights": "embargoed access"}}, { @@ -2275,18 +2272,8 @@ def create_aggregations(facets): { "bool": { "must": [ - { - "term": { - "content.accessrole.raw": "open_date" - } - }, - { - "range": { - "content.date.dateValue.raw": { - "gt": "@date" - } - } - } + {"term": {"content.accessrole.raw": "open_date"}}, + {"range": {"content.date.dateValue.raw": {"gt": "@date"}}} ] } } @@ -2301,38 +2288,26 @@ def create_aggregations(facets): "nested": { "path": "content", "query": { - "term": { - "content.accessrole.raw": "open_restricted" - } + "term": {"content.accessrole.raw": "open_restricted"} } } } ] - }, - "restricted access": { + } + }, + "restricted access": { + "bool": { "should": [ {"term": {"accessRights": "restricted access"}}, - {"term": {"accessRights": "embargoed access"}}, - { - "nested": { - "path": "content", - "query": { - "term": { - "content.accessrole.raw": "open_restricted" - } - } - } - }, { "bool": { "must": [ + {"term": {"accessRights": "embargoed access"}}, { "nested": { "path": "content", "query": { - "term": { - "content.accessrole.raw": "open_login" - } + "term": {"content.accessrole.raw": "open_login"} } } }, @@ -2345,18 +2320,8 @@ def create_aggregations(facets): "query": { "bool": { "must": [ - { - "term": { - "content.accessrole.raw": "open_date" - } - }, - { - "range": { - "content.date.dateValue.raw": { - "gt": "@date" - } - } - } + {"term": {"content.accessrole.raw": "open_date"}}, + {"range": {"content.date.dateValue.raw": {"gt": "@date"}}} ] } } @@ -2369,12 +2334,15 @@ def create_aggregations(facets): } } ] - }, - "metadata only access": { - "term": { - "accessRights": "metadata only access" - } } + }, + "metadata only access": { + "bool": { + "must": [ + {"term": {"accessRights": "metadata only access"}} + ] + } + } } def _replace_date(obj, now): """Recursively replace @date with current date.""" From cd3604495dd8b8c0c2fba577fc6d133ee2b963c5 Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Wed, 11 Mar 2026 17:50:15 +0900 Subject: [PATCH 04/28] fix Embargo revison --- .../invenio_records_rest/facets.py | 17 ++++++++++---- .../invenio_records_rest/views.py | 3 +-- modules/weko-admin/weko_admin/utils.py | 22 +++++++++---------- .../weko-search-ui/weko_search_ui/query.py | 9 +------- .../weko-search-ui/weko_search_ui/utils.py | 4 +--- 5 files changed, 27 insertions(+), 28 deletions(-) diff --git a/modules/invenio-records-rest/invenio_records_rest/facets.py b/modules/invenio-records-rest/invenio_records_rest/facets.py index 280c8f7321..a485e2aa4b 100644 --- a/modules/invenio-records-rest/invenio_records_rest/facets.py +++ b/modules/invenio-records-rest/invenio_records_rest/facets.py @@ -97,10 +97,19 @@ def _create_filter_dsl(urlkwargs, definitions): for name, filter_factory in definitions.items(): values = request.values.getlist(name, type=text_type) if values: - filters.append(filter_factory(values)) - for v in values: - urlkwargs.add(name, v) - + if name in ("Access", "accessRights") and "new_accessRights" in definitions: + new_accessrights_filters = definitions["new_accessRights"]["filters"]["filters"] + access_rights_queries = [] + for v in values: + if v in new_accessrights_filters: + access_rights_queries.append(Q(new_accessrights_filters[v])) + urlkwargs.add(name, v) + if access_rights_queries: + filters.append(Q('bool', should=access_rights_queries)) + else: + filters.append(filter_factory(values)) + for v in values: + urlkwargs.add(name, v) return (filters, urlkwargs) diff --git a/modules/invenio-records-rest/invenio_records_rest/views.py b/modules/invenio-records-rest/invenio_records_rest/views.py index 367e3947f2..dd073f60a4 100644 --- a/modules/invenio-records-rest/invenio_records_rest/views.py +++ b/modules/invenio-records-rest/invenio_records_rest/views.py @@ -853,9 +853,8 @@ def url_args_check(): if size * page < search_result.hits.total and \ size * page < self.max_result_window: links['next'] = url_for(endpoint, page=page + 1, **urlkwargs) - from weko_search_ui.utils import combine_aggs + from weko_search_ui.utils import combine_aggs, fix_aggregations_accessrights search_result = combine_aggs(search_result.to_dict()) - from weko_search_ui.utils import fix_aggregations_accessrights search_result = fix_aggregations_accessrights(search_result) return self.make_response( pid_fetcher=self.pid_fetcher, diff --git a/modules/weko-admin/weko_admin/utils.py b/modules/weko-admin/weko_admin/utils.py index 97135ea19b..ff367ea102 100755 --- a/modules/weko-admin/weko_admin/utils.py +++ b/modules/weko-admin/weko_admin/utils.py @@ -25,6 +25,7 @@ import os import traceback import zipfile +import copy from datetime import datetime, timedelta from io import BytesIO, StringIO from typing import Dict, Optional, Tuple, Union @@ -2210,7 +2211,6 @@ def create_aggregations(facets): agg_no_permission_query.update( create_agg_by_aggregations(facet.aggregations, key, val) ) - import copy # Add aggregation conditions for accessRights ACCESSRIGHTS_FIX_ENABLED = current_app.config.get( "WEKO_SEARCH_FIX_ACCESSRIGHTS", False @@ -2238,16 +2238,9 @@ def create_aggregations(facets): "path": "content", "query": { "bool": { - "must_not": [ - {"term": {"content.accessrole.raw": "open_access"}}, - { - "bool": { - "must": [ - {"term": {"content.accessrole.raw": "open_date"}}, - {"range": {"content.date.dateValue.raw": {"lte": "@date"}}} - ] - } - } + "must": [ + {"term": {"content.accessrole.raw": "open_date"}}, + {"range": {"content.date.dateValue.raw": {"lte": "@date"}}} ] } } @@ -2406,6 +2399,11 @@ def create_post_filters(facets): agg_has_permission, agg_no_permission = create_aggregations( activated_facets) post_filters = create_post_filters(activated_facets) + ACCESSRIGHTS_FIX_ENABLED = current_app.config.get( + "WEKO_SEARCH_FIX_ACCESSRIGHTS", False + ) + if ACCESSRIGHTS_FIX_ENABLED and "new_accessRights" in agg_has_permission: + post_filters["new_accessRights"] = agg_has_permission["new_accessRights"] # Create facet search query for has permission. has_permission_query[search_index] = dict( aggs=agg_has_permission, @@ -2457,6 +2455,8 @@ def get_facet_search_query(has_permission=True): from weko_admin.utils import get_title_facets titles, order, uiTypes, isOpens, displayNumbers, searchConditions = get_title_facets() for k, v in post_filters.items(): + if k == "new_accessRights": + continue if v == 'temporal': # If the mapping name is [template], it is assumed to be a Filter to date_range1. post_filters.update({k: range_filter('date_range1', False, False)}) diff --git a/modules/weko-search-ui/weko_search_ui/query.py b/modules/weko-search-ui/weko_search_ui/query.py index 8651e4bef4..1cb5ad011c 100644 --- a/modules/weko-search-ui/weko_search_ui/query.py +++ b/modules/weko-search-ui/weko_search_ui/query.py @@ -621,12 +621,7 @@ def __get_accessrights_query(params): if not accessrights_value: return None - if ',' in accessrights_value: - accessrights_value = ' OR '.join([ - v.strip() for v in accessrights_value.split(',') if v.strip() - ]) - - accessrights_list = _split_text_by_or(accessrights_value) + accessrights_list = [v.strip() for v in accessrights_value.split(',') if v.strip()] weko_access_rights_choices = current_app.config.get( 'WEKO_ACCESS_RIGHTS_CHOICES', [ 'embargoed access', @@ -721,8 +716,6 @@ def metadata_only_query(): queries.append(restricted_access_query(now)) elif accessright == 'metadata only access': queries.append(metadata_only_query()) - else: - queries.append(Q('term', accessRights=accessright)) queries = [q for q in queries if q is not None] if not queries: diff --git a/modules/weko-search-ui/weko_search_ui/utils.py b/modules/weko-search-ui/weko_search_ui/utils.py index 267ca7d9bc..7bef7d1bb4 100644 --- a/modules/weko-search-ui/weko_search_ui/utils.py +++ b/modules/weko-search-ui/weko_search_ui/utils.py @@ -73,7 +73,7 @@ from sqlalchemy import func as _func from sqlalchemy.exc import SQLAlchemyError -from weko_admin.models import AdminSettings, SessionLifetime +from weko_admin.models import AdminSettings, SessionLifetime, FacetSearchSetting from weko_admin.utils import get_redis_cache, reset_redis_cache, get_restricted_access from weko_admin.api import TempDirInfo from weko_authors.models import AuthorsAffiliationSettings, AuthorsPrefixSettings @@ -6158,8 +6158,6 @@ def fix_aggregations_accessrights(data): Returns: dict: Modified aggregation result. """ - from flask import current_app - from weko_admin.models import FacetSearchSetting ACCESSRIGHTS_FIX_ENABLED = current_app.config.get( "WEKO_SEARCH_FIX_ACCESSRIGHTS", False ) From 7e8bd03026b4efe8877383dfe551094da9370278 Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Tue, 17 Mar 2026 09:20:53 +0900 Subject: [PATCH 05/28] Embargo Unit Test --- .../invenio-records-rest/tests/test_facets.py | 53 ++- modules/weko-admin/tests/test_utils.py | 281 ++++++++++---- modules/weko-records/tests/test_utils.py | 364 +++++++++++++++++- modules/weko-records/weko_records/utils.py | 126 ++---- modules/weko-search-ui/tests/test_query.py | 53 +++ modules/weko-search-ui/tests/test_utils.py | 68 +++- .../weko-search-ui/weko_search_ui/query.py | 4 +- .../weko-search-ui/weko_search_ui/utils.py | 11 +- 8 files changed, 784 insertions(+), 176 deletions(-) diff --git a/modules/invenio-records-rest/tests/test_facets.py b/modules/invenio-records-rest/tests/test_facets.py index 27f25a5716..3c4a7ef40b 100644 --- a/modules/invenio-records-rest/tests/test_facets.py +++ b/modules/invenio-records-rest/tests/test_facets.py @@ -85,6 +85,57 @@ def test_create_filter_dsl(): assert not filters assert args == kwargs +def test_create_filter_dsl_accessrights(): + app = Flask('testapp') + # Definition of new_accessRights + definitions = { + "accessRights": lambda values: Q('terms', accessRights=values), + "new_accessRights": { + "filters": { + "filters": { + "open": {"term": {"accessRights": "open"}}, + "closed": {"term": {"accessRights": "closed"}}, + } + } + } + } + # Values included in new_accessrights_filters + with app.test_request_context('?accessRights=open&accessRights=closed'): + kwargs = MultiDict() + filters, args = _create_filter_dsl(kwargs, definitions) + assert len(filters) == 1 + assert filters[0].to_dict() == { + "bool": { + "should": [ + {"term": {"accessRights": "open"}}, + {"term": {"accessRights": "closed"}} + ] + } + } + assert args.getlist('accessRights') == ['open', 'closed'] + + # Values not included in new_accessrights_filters + with app.test_request_context('?accessRights=unknown'): + kwargs = MultiDict() + filters, args = _create_filter_dsl(kwargs, definitions) + assert filters == [] + assert args.getlist('accessRights') == [] + + # When the request value is empty + with app.test_request_context(''): + kwargs = MultiDict() + filters, args = _create_filter_dsl(kwargs, definitions) + assert filters == [] + assert args == kwargs + + definitions = { + "other": lambda values: Q('terms', other=values), + } + with app.test_request_context('?other=open'): + kwargs = MultiDict() + filters, args = _create_filter_dsl(kwargs, definitions) + assert filters == [Q('terms', other=['open'])] + assert args.getlist('other') == ['open'] def test_post_filter(app): """Test post filter.""" @@ -242,4 +293,4 @@ def test_default_facets_factory(app, db, search_user, redis_connect): assert 'aggs' not in search.to_dict() assert 'post_filter' not in search.to_dict() assert 'bool' not in search.to_dict()['query'] - redis_connect.delete(test_redis_key) \ No newline at end of file + redis_connect.delete(test_redis_key) diff --git a/modules/weko-admin/tests/test_utils.py b/modules/weko-admin/tests/test_utils.py index 97d0fa57da..fe8c86f7e2 100755 --- a/modules/weko-admin/tests/test_utils.py +++ b/modules/weko-admin/tests/test_utils.py @@ -1927,7 +1927,7 @@ def test_update_restricted_access(admin_settings,mocker): result = update_restricted_access(data) mock_called.assert_not_called() mock_called.reset_mock() - + data = { "edit_mail_templates_enable": False } @@ -2171,6 +2171,207 @@ def test_create_facet_search_query(facet_search_settings): assert has_permission == test_has_permission assert no_permission == test_no_permission +from flask import Flask, current_app +from datetime import datetime +from weko_admin.utils import create_facet_search_query + +class DummyFacet: + def __init__(self, name_en, mapping, aggregations): + self.name_en = name_en + self.mapping = mapping + self.aggregations = aggregations + +# .tox/c1/bin/pytest --cov=weko_admin tests/test_utils.py::test_create_aggregations_branch -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/weko-admin/.tox/c1/tmp +def test_create_aggregations_branch(mocker): + ACCESS_RIGHTS_QUERY_TEMPLATE = { + "open access": { + "bool": { + "should": [ + {"term": {"accessRights": "open access"}}, + { + "bool": { + "must": [ + {"term": {"accessRights": "embargoed access"}}, + { + "nested": { + "path": "content", + "query": { + "bool": { + "must": [ + {"term": {"content.accessrole.raw": "open_date"}}, + {"range": {"content.date.dateValue.raw": {"lte": "@date"}}} + ] + } + } + } + } + ] + } + } + ] + } + }, + "embargoed access": { + "bool": { + "must": [ + {"term": {"accessRights": "embargoed access"}}, + { + "nested": { + "path": "content", + "query": { + "bool": { + "should": [ + { + "bool": { + "must": [ + {"term": {"content.accessrole.raw": "open_date"}}, + {"range": {"content.date.dateValue.raw": {"gt": "@date"}}} + ] + } + } + ] + } + } + } + } + ], + "must_not": [ + { + "nested": { + "path": "content", + "query": { + "term": {"content.accessrole.raw": "open_restricted"} + } + } + } + ] + } + }, + "restricted access": { + "bool": { + "should": [ + {"term": {"accessRights": "restricted access"}}, + { + "bool": { + "must": [ + {"term": {"accessRights": "embargoed access"}}, + { + "nested": { + "path": "content", + "query": { + "term": {"content.accessrole.raw": "open_login"} + } + } + }, + { + "bool": { + "must_not": [ + { + "nested": { + "path": "content", + "query": { + "bool": { + "must": [ + {"term": {"content.accessrole.raw": "open_date"}}, + {"range": {"content.date.dateValue.raw": {"gt": "@date"}}} + ] + } + } + } + } + ] + } + } + ] + } + } + ] + } + }, + "metadata only access": { + "bool": { + "must": [ + {"term": {"accessRights": "metadata only access"}} + ] + } + } + } + # 1. ACCESSRIGHTS_FIX_ENABLED=False + app = Flask(__name__) + app.config["SEARCH_UI_SEARCH_INDEX"] = "testidx" + app.config["WEKO_SEARCH_FIX_ACCESSRIGHTS"] = False + mocker.patch("weko_admin.models.FacetSearchSetting.get_activated_facets", return_value=[DummyFacet("type", "type", [])]) + with app.app_context(): + has_permission, no_permission = create_facet_search_query() + aggs = has_permission["testidx"]["aggs"] + print("--- new_accessRights aggregation ---") + import pprint + pprint.pprint(aggs.get("new_accessRights")) + assert "new_accessRights" not in aggs + + # 2. ACCESSRIGHTS_FIX_ENABLED=True, no accessRights facet + app = Flask(__name__) + app.config["SEARCH_UI_SEARCH_INDEX"] = "testidx" + app.config["WEKO_SEARCH_FIX_ACCESSRIGHTS"] = True + mocker.patch("weko_admin.models.FacetSearchSetting.get_activated_facets", return_value=[DummyFacet("type", "type", [])]) + with app.app_context(): + has_permission, no_permission = create_facet_search_query() + aggs = has_permission["testidx"]["aggs"] + assert "new_accessRights" not in aggs + + # 3. ACCESSRIGHTS_FIX_ENABLED=True, with accessRights facet + app = Flask(__name__) + app.config["SEARCH_UI_SEARCH_INDEX"] = "testidx" + app.config["WEKO_SEARCH_FIX_ACCESSRIGHTS"] = True + app.config["WEKO_ACCESS_RIGHTS_CHOICES"] = [ + "open access", "embargoed access", "restricted access", "metadata only access" + ] + mocker.patch("weko_admin.models.FacetSearchSetting.get_activated_facets", return_value=[ + DummyFacet("accessRights", "accessRights", [ + {"agg_mapping": "publish_status", "agg_value": 0} + ]) + ]) + with app.app_context(): + has_permission, no_permission = create_facet_search_query() + aggs = has_permission["testidx"]["aggs"] + assert "new_accessRights" in aggs + filters = aggs["new_accessRights"]["filters"]["filters"] + for access_type in app.config["WEKO_ACCESS_RIGHTS_CHOICES"]: + assert access_type in filters + assert "bool" in filters[access_type] + import copy + from datetime import datetime + for access_type in app.config["WEKO_ACCESS_RIGHTS_CHOICES"]: + template = copy.deepcopy(ACCESS_RIGHTS_QUERY_TEMPLATE[access_type]) + actual_bool = filters[access_type]["bool"] + # Replace @date in the template with today's date + def replace_date(obj, today): + if isinstance(obj, dict): + for k, v in obj.items(): + if isinstance(v, str) and v == "@date": + obj[k] = today + else: + replace_date(v, today) + elif isinstance(obj, list): + for v in obj: + replace_date(v, today) + today = datetime.now().strftime("%Y-%m-%d") + replace_date(template, today) + # After the publish_status term condition, check if the template structure matches + must_list = actual_bool.get("must", []) + access_query = must_list[2] if len(must_list) > 2 else must_list[-1] + assert access_query == template + + # When facet mapping includes “fields.raw” + app = Flask(__name__) + app.config["SEARCH_UI_SEARCH_INDEX"] = "testidx" + mocker.patch("weko_admin.models.FacetSearchSetting.get_activated_facets", return_value=[ + DummyFacet("raw_test", "test.fields.raw", []) + ]) + with app.app_context(): + has_permission, no_permission = create_facet_search_query() + post_filters = has_permission["testidx"]["post_filters"] + assert post_filters["raw_test"] == "test.raw" # def store_facet_search_query_in_redis(): # .tox/c1/bin/pytest --cov=weko_admin tests/test_utils.py::test_store_facet_search_query_in_redis -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/weko-admin/.tox/c1/tmp @@ -2189,7 +2390,12 @@ def test_store_facet_search_query_in_redis(mocker): 'aggs': {'Data Language': {'aggs': {'Data Language': {'terms': {'field': 'language','size': 1000}}}, 'filter': {'bool': {'must': [{'term': {'publish_status': '0'}}]}}}, 'Data Type': {'aggs': {'Data Type': {'terms': {'field': 'description.value','size': 1000}}}, - 'filter': {'bool': {'must': [{'term': {'description.descriptionType': 'Other'}},{'term': {'publish_status': '0'}}]}}}}, + 'filter': {'bool': {'must': [{'term': {'description.descriptionType': 'Other'}},{'term': {'publish_status': '0'}}]}}}, + 'Time Period(s)': {'aggs': {'Time Period(s)': {'terms':{'field': 'temporal','size':1000}}}, + 'filter':{'bool':{'must':[{'term':{'publish_status':'0'}}]}}}, + 'raw_test': {'aggs': {'raw_test': {'terms':{'field': 'fields.raw','size':1000}}}, + 'filter':{'bool':{'must':[{'term':{'publish_status':'0'}}]}}} + }, 'post_filters': {'Data Language': 'language', 'Data Type': 'description.value'}}, } @@ -2246,77 +2452,6 @@ def test_get_facet_search_query(app,mocker): # .tox/c1/bin/pytest --cov=weko_admin tests/test_utils.py::test_get_title_facets -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/weko-admin/.tox/c1/tmp def test_get_title_facets(app,facet_search_settings): with app.test_request_context(headers=[('Accept-Language', 'en')]): - titles, order = get_title_facets() - assert titles == {"Data Language":"Data Language","Data Type":"Data Type","raw_test":"raw_test"} - assert order == {1:"Data Language",3:"Data Type",4:"raw_test"} - - -# def is_exits_facet(data, id): -# .tox/c1/bin/pytest --cov=weko_admin tests/test_utils.py::test_is_exits_facet -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/weko-admin/.tox/c1/tmp -def test_is_exits_facet(app, facet_search_settings): - with app.test_request_context(headers=[('Accept-Language', 'en')]): - # not id > 0 - result = is_exits_facet({"name_en":"Data Type","name_jp":"データタイプ","mapping":"description.value"},None) - assert result == True - result = is_exits_facet({"name_en":"not exist facet","name_jp":"存在しないファセット","mapping":"not exist mapping"},None) - assert result == False - - # id > 0 - result = is_exits_facet({"name_en":"Data Type","name_jp":"データタイプ","mapping":"description.value"},"3") - assert result == False - result = is_exits_facet({"name_en":"Data Type","name_jp":"データタイプ","mapping":"description.value"},"100") - assert result == True - -# def overwrite_the_memory_config_with_db(app, site_info): -# .tox/c1/bin/pytest --cov=weko_admin tests/test_utils.py::test_overwrite_the_memory_config_with_db -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/weko-admin/.tox/c1/tmp -def test_overwrite_the_memory_config_with_db(app,client,site_info): - from flask import Flask - - site_info_not_google = SiteInfo( - site_name=[{"name":"test_site_info"}], - notify={"name":"test_notify"} - ) - - site_info_google1 = SiteInfo( - site_name=[{"name":"test_site_info"}], - notify={"name":"test_notify"}, - google_tracking_id_user="test_tracking_id1", - ) - site_info_google2 = SiteInfo( - site_name=[{"name":"test_site_info"}], - notify={"name":"test_notify"}, - google_tracking_id_user="test_tracking_id2", - ) - - app = Flask("test_weko_admin_app") - # site_info is None - overwrite_the_memory_config_with_db(app, None) - - # site_info.google_tracking_id_user is not exist - overwrite_the_memory_config_with_db(app, site_info_not_google) - - # GOOGLE_TRACKING_ID_USER is not exist - overwrite_the_memory_config_with_db(app, site_info_google1) - assert app.config["GOOGLE_TRACKING_ID_USER"] == "test_tracking_id1" - - overwrite_the_memory_config_with_db(app, site_info_google2) - assert app.config["GOOGLE_TRACKING_ID_USER"] == "test_tracking_id2" - -import json -import pytest -from flask import current_app, make_response, request, url_for -from flask_login import current_user -from mock import patch - -from weko_admin.utils import ( - get_title_facets -) - -# def get_title_facets(): -def test_get_title_facets(app, users, facet_search_settings): - #facet_search_setting = json_data("data/test_facet.json") - with app.test_request_context(headers=[('Accept-Language', 'en')]): - #with patch("weko_admin.models.FacetSearchSetting.get_activated_facets", return_value=facet_search_setting): titles, order, uiTypes, isOpens, displayNumbers, searchConditions = get_title_facets() assert uiTypes assert isOpens diff --git a/modules/weko-records/tests/test_utils.py b/modules/weko-records/tests/test_utils.py index f92e6db48e..c414011014 100644 --- a/modules/weko-records/tests/test_utils.py +++ b/modules/weko-records/tests/test_utils.py @@ -58,7 +58,9 @@ replace_fqdn, replace_fqdn_of_file_metadata, get_author_link, - set_file_date) + set_file_date, + check_embargo_rights, + update_embargo_rights) from weko_records.api import ItemTypes, Mapping from weko_records.models import ItemTypeName from weko_workflow.models import Activity @@ -122,7 +124,7 @@ def dumps(self): app.config['WEKO_SCHEMA_JPCOAR_V1_SCHEMA_NAME'] = 'jpcoar_v1_mapping' app.config['WEKO_SCHEMA_DDI_SCHEMA_NAME'] = 'ddi_mapping' dc, jrc, is_edit = json_loader(data3,_pid) - assert dc == OrderedDict([('item_1', {'attribute_name': 'item_1', 'attribute_value': 'item_1_v'}), ('item_2', {'attribute_name': 'item_2', 'attribute_value': ''}), ('item_3', {'attribute_name': 'item_3', 'attribute_type': 'creator', 'attribute_value_mlt': [{'item_3_1': 'item_3_1_v'}]}), ('item_4', {'attribute_name': 'item_4', 'attribute_value_mlt': [{'item_4_1': 'item_4_1_v'}]}), ('item_5', {'attribute_name': 'item_5', 'attribute_type': 'file', 'attribute_value_mlt': [{'filename': 'item_5'}]}), ('item_6', {'attribute_name': 'item_6', 'attribute_value_mlt': [{}]}), ('item_7', {'attribute_name': 'item_7', 'attribute_value_mlt': [{}, {'nameIdentifiers': [{'nameIdentifierScheme': 'WEKO', 'nameIdentifier': '1234'}]}]}), ('item_8', {'attribute_name': 'item_8', 'attribute_value_mlt': [{'nameIdentifiers': [{'nameIdentifierScheme': 'WEKO', 'nameIdentifier': '5678'}]}]}), ('item_title', 'test_item1'), ('item_type_id', '3'), ('control_number', '1'), + assert dc == OrderedDict([('item_1', {'attribute_name': 'item_1', 'attribute_value': 'item_1_v'}), ('item_2', {'attribute_name': 'item_2', 'attribute_value': ''}), ('item_3', {'attribute_name': 'item_3', 'attribute_type': 'creator', 'attribute_value_mlt': [{'item_3_1': 'item_3_1_v'}]}), ('item_4', {'attribute_name': 'item_4', 'attribute_value_mlt': [{'item_4_1': 'item_4_1_v'}]}), ('item_5', {'attribute_name': 'item_5', 'attribute_type': 'file', 'attribute_value_mlt': [{'filename': 'item_5'}]}), ('item_6', {'attribute_name': 'item_6', 'attribute_value_mlt': [{}]}), ('item_7', {'attribute_name': 'item_7', 'attribute_value_mlt': [{}, {'nameIdentifiers': [{'nameIdentifierScheme': 'WEKO', 'nameIdentifier': '1234'}]}]}), ('item_8', {'attribute_name': 'item_8', 'attribute_value_mlt': [{'nameIdentifiers': [{'nameIdentifierScheme': 'WEKO', 'nameIdentifier': '5678'}]}]}), ('item_title', 'test_item1'), ('item_type_id', '3'), ('control_number', '1'), ('author_link', ['1234', '5678']), ('weko_shared_ids', []), ('_oai', {'id': '1'}), ('owner', 1), ('owners', [1])]) assert jrc == {'item_4': ['item_4_1_v'], 'creator1': {'nameIdentifier': ['1234', '5678']}, 'item_5': ['item_5'], 'item_3': ['item_3_1_v'], 'control_number': '1', '_oai': {'id': '1'}, '_item_metadata': OrderedDict([('item_1', {'attribute_name': 'item_1', 'attribute_value': 'item_1_v'}), ('item_2', {'attribute_name': 'item_2', 'attribute_value': ''}), ('item_3', {'attribute_name': 'item_3', 'attribute_type': 'creator', 'attribute_value_mlt': [{'item_3_1': 'item_3_1_v'}]}), ('item_4', {'attribute_name': 'item_4', 'attribute_value_mlt': [{'item_4_1': 'item_4_1_v'}]}), ('item_5', {'attribute_name': 'item_5', 'attribute_type': 'file', 'attribute_value_mlt': [{'filename': 'item_5'}]}), ('item_6', {'attribute_name': 'item_6', 'attribute_value_mlt': [{}]}), ('item_7', {'attribute_name': 'item_7', 'attribute_value_mlt': [{}, {'nameIdentifiers': [{'nameIdentifierScheme': 'WEKO', 'nameIdentifier': '1234'}]}]}), ('item_8', {'attribute_name': 'item_8', 'attribute_value_mlt': [{'nameIdentifiers': [{'nameIdentifierScheme': 'WEKO', 'nameIdentifier': '5678'}]}]}), ('item_title', 'test_item1'), ('item_type_id', '3'), ('control_number', '1'), ('author_link', ['1234', '5678']), ('weko_shared_ids', []), ('_oai', {'id': '1'}), ('owner', 1), ('owners', [1])]), 'itemtype': 'test10', 'publish_date': None, @@ -175,18 +177,18 @@ class MockSM: dc, jrc, is_edit = json_loader(data5, _pid) assert dc == OrderedDict([('item_1', {'attribute_name': 'item_1', 'attribute_value': 'item_1_v'}), ('item_2', {'attribute_name': 'item_2', 'attribute_value': 'item_2_v'}), ('item_3', {'attribute_name': 'item_3', 'attribute_type': 'creator', 'attribute_value_mlt': [{'item_3_1': 'item_3_1_v'}]}), ('item_4', {'attribute_name': 'item_4', 'attribute_value_mlt': [{'item_4_1': 'item_4_1_v'}]}), ('item_5', {'attribute_name': 'item_5', 'attribute_type': 'file', 'attribute_value_mlt': [{'filename': 'item_5'}]}), ('item_6', {'attribute_name': 'item_6', 'attribute_value_mlt': [{'item_6_1': 'item_6_1_v'}]}), ('item_7', {'attribute_name': 'item_7', 'attribute_value_mlt': [{}, {'nameIdentifiers': [{'nameIdentifierScheme': 'WEKO', 'nameIdentifier': '1234'}]}]}), ('item_8', {'attribute_name': 'item_8', 'attribute_value_mlt': [{'nameIdentifiers': [{'nameIdentifierScheme': 'WEKO', 'nameIdentifier': '5678'}]}]}), ('item_title', 'test_item2'), ('item_type_id', '3'), ('control_number', '1'), ('author_link', ['1234', '5678']), ('weko_shared_ids', [2]), ('owner', 5), ('owners', [5])]) - assert jrc == {'item_5': ['item_5'], 'item_4': ['item_4_1_v'], 'creator1': {'nameIdentifier': ['1234', '5678']}, 'item_6': ['item_6_1_v'], 'item_3': ['item_3_1_v'], 'control_number': '1', '_oai': {'id': '1'}, '_item_metadata': - OrderedDict([('item_1', {'attribute_name': 'item_1', 'attribute_value': 'item_1_v'}), - ('item_2', {'attribute_name': 'item_2', 'attribute_value': 'item_2_v'}), + assert jrc == {'item_5': ['item_5'], 'item_4': ['item_4_1_v'], 'creator1': {'nameIdentifier': ['1234', '5678']}, 'item_6': ['item_6_1_v'], 'item_3': ['item_3_1_v'], 'control_number': '1', '_oai': {'id': '1'}, '_item_metadata': + OrderedDict([('item_1', {'attribute_name': 'item_1', 'attribute_value': 'item_1_v'}), + ('item_2', {'attribute_name': 'item_2', 'attribute_value': 'item_2_v'}), ('item_3', {'attribute_name': 'item_3', 'attribute_type': 'creator', 'attribute_value_mlt': [{'item_3_1': 'item_3_1_v'}]}), ('item_4', {'attribute_name': 'item_4', 'attribute_value_mlt': [{'item_4_1': 'item_4_1_v'}]}), ('item_5', {'attribute_name': 'item_5', 'attribute_type': 'file', 'attribute_value_mlt': [{'filename': 'item_5'}]}), ('item_6', {'attribute_name': 'item_6', 'attribute_value_mlt': [{'item_6_1': 'item_6_1_v'}]}), ('item_7', {'attribute_name': 'item_7', 'attribute_value_mlt': [{}, {'nameIdentifiers': [{'nameIdentifierScheme': 'WEKO', 'nameIdentifier': '1234'}]}]}), ('item_8', {'attribute_name': 'item_8', 'attribute_value_mlt': [{'nameIdentifiers': [{'nameIdentifierScheme': 'WEKO', 'nameIdentifier': '5678'}]}]}), - ('item_title', 'test_item2'), ('item_type_id', '3'), ('control_number', '1'), + ('item_title', 'test_item2'), ('item_type_id', '3'), ('control_number', '1'), ('author_link', ['1234', '5678']),('weko_shared_ids', [2]), ('owner', 5), ('owners', [5])]), - 'itemtype': 'test10', 'publish_date': None, + 'itemtype': 'test10', 'publish_date': None, 'author_link': ['1234', '5678'], 'weko_creator_id': '5', 'weko_shared_ids': [2]} assert is_edit == True @@ -2649,3 +2651,351 @@ def test_replace_fqdn_of_file_metadata(app): replace_fqdn_of_file_metadata(_file_metadata_list2) assert _file_metadata_list2==[{'url': {'url': 'https://localhost/a'}, 'version_id': '1'}, {'url': {'url': 'https://localhost/b'}, 'version_id': '1'}] +# .tox/c1/bin/pytest --cov=weko_records tests/test_utils.py::test_check_embargo_rights -v -s -vv --cov-branch --cov-report=term --cov-config=tox.ini --basetemp=/code/modules/weko-records/.tox/c1/tmp +def test_check_embargo_rights(): + # Do nothing except for 'embargoed access' + result = check_embargo_rights("open_access", datetime.date.today(), []) + assert result == (False, None) + + # If there is at least one 'open_restricted', return 'restricted access' + today = datetime.date.today() + accessrole_date = [("open_restricted", None), ("open_access", None)] + result = check_embargo_rights("embargoed access", today, accessrole_date) + assert result == (True, "restricted access") + + # If there is a future date in 'open_date', do nothing + today = datetime.date.today() + future = today + datetime.timedelta(days=1) + accessrole_date = [("open_date", future)] + result = check_embargo_rights("embargoed access", today, accessrole_date) + assert result == (False, None) + + # If there is at least one 'open_login', return 'restricted access' + today = datetime.date.today() + accessrole_date = [("open_login", None)] + result = check_embargo_rights("embargoed access", today, accessrole_date) + assert result == (True, "restricted access") + + # If all are 'open_access', return 'open access' + today = datetime.date.today() + accessrole_date = [("open_access", None), ("open_access", None)] + result = check_embargo_rights("embargoed access", today, accessrole_date) + assert result == (True, "open access") + + # If all are 'open_date' and the date is in the past, return 'open access' + today = datetime.date.today() + past = today - datetime.timedelta(days=1) + accessrole_date = [("open_date", past), ("open_date", past)] + result = check_embargo_rights("embargoed access", today, accessrole_date) + assert result == (True, "open access") + + # If 'open_access' and 'open_date' (past) are mixed, return 'open access' + today = datetime.date.today() + past = today - datetime.timedelta(days=1) + accessrole_date = [("open_access", None), ("open_date", past)] + result = check_embargo_rights("embargoed access", today, accessrole_date) + assert result == (True, "open access") + + # If accessrole_date is empty, do nothing + today = datetime.date.today() + accessrole_date = [] + result = check_embargo_rights("embargoed access", today, accessrole_date) + assert result == (False, None) + +from unittest.mock import MagicMock +import weko_records.utils as utils + +# .tox/c1/bin/pytest --cov=weko_records tests/test_utils.py::test_update_embargo_rights -v -s -vv --cov-branch --cov-report=term --cov-config=tox.ini --basetemp=/code/modules/weko-records/.tox/c1/tmp +def test_update_embargo_rights(app, monkeypatch): + base_meta = { + "item_type_id": "test", + "item_1736146823660": { + "attribute_name": "アクセス権", + "attribute_value_mlt": [ + { + "subitem_access_right": "embargoed access", + "subitem_access_right_uri": "http://purl.org/coar/access_right/c_f1cf" + } + ] + }, + "item_1736148125517": { + "attribute_name": "ファイル情報", + "attribute_type": "file", + "attribute_value_mlt": [ + { + "url": { + "url": "https://weko3.example.org/record/2000088/files/image141.png" + }, + "date": [ + { + "dateType": "Available", + "dateValue": "2026-03-31" + } + ], + "format": "image/png", + "filename": "image141.png", + "filesize": [ + { + "value": "11 KB" + } + ], + "accessrole": "open_date", + "version_id": "55995df6-6d1c-4bbf-8530-d1c0fb5a4219", + "subitem_access_right": "embargoed access" + } + ] + } + } + + # Embargo is released and access becomes open_access + cfg = True + meta = copy.deepcopy(base_meta) + access_path = "item_1736148125517.attribute_value_mlt.subitem_access_right" + monkeypatch.setattr(utils, "current_app", MagicMock(config={"WEKO_SEARCH_FIX_ACCESSRIGHTS": cfg})) + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) + monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (True, "open access")) + utils.update_embargo_rights(meta) + assert meta["item_1736148125517"]["attribute_value_mlt"][0]["subitem_access_right"] == "open access" + + # Embargo is released and access becomes restricted_access + meta = copy.deepcopy(base_meta) + monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (True, "restricted access")) + utils.update_embargo_rights(meta) + assert meta["item_1736148125517"]["attribute_value_mlt"][0]["subitem_access_right"] == "restricted access" + + # Embargo is not released (returns None), access remains embargoed + meta = copy.deepcopy(base_meta) + monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (False, None)) + utils.update_embargo_rights(meta) + assert meta["item_1736148125517"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" + + # File information with accessrole=None is handled correctly + meta = copy.deepcopy(base_meta) + meta["item_1736148125517"]["attribute_value_mlt"][0]["accessrole"] = None + monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (False, None)) + utils.update_embargo_rights(meta) + assert meta["item_1736148125517"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" + + # Access rights information with None is handled correctly + meta = copy.deepcopy(base_meta) + meta["item_1736146823660"]["attribute_value_mlt"][0]["subitem_access_right"] = None + monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (False, None)) + utils.update_embargo_rights(meta) + assert meta["item_1736148125517"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" + + # Access rights information with empty list is handled correctly + meta = copy.deepcopy(base_meta) + meta["item_1736146823660"]["attribute_value_mlt"] = [] + monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (False, None)) + utils.update_embargo_rights(meta) + assert meta["item_1736148125517"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" + + # Empty meta dict does not change anything + meta = {} + utils.update_embargo_rights(meta) + assert "subitem_access_right" not in meta + + # No change if access_path does not exist in mapping + meta = copy.deepcopy(base_meta) + monkeypatch.setattr(utils, "current_app", MagicMock(config={"WEKO_SEARCH_FIX_ACCESSRIGHTS": True})) + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {}) # accessRights.@valueなし + monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (True, "open access")) + utils.update_embargo_rights(meta) + assert meta["item_1736148125517"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" + + # No change if WEKO_SEARCH_FIX_ACCESSRIGHTS is False + meta = copy.deepcopy(base_meta) + monkeypatch.setattr(utils, "current_app", MagicMock(config={"WEKO_SEARCH_FIX_ACCESSRIGHTS": False})) + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) + monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (True, "open access")) + utils.update_embargo_rights(meta) + assert meta["item_1736148125517"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" + + # Only the second element in attribute_value_mlt list is the target key + meta = { + "item_type_id": "test", + "item_1": { + "attribute_value_mlt": [ + {"dummy": 1}, + {"subitem_access_right": "embargoed access"} + ] + } + } + access_path = "item_1.attribute_value_mlt.subitem_access_right" + monkeypatch.setattr(utils, "current_app", MagicMock(config={"WEKO_SEARCH_FIX_ACCESSRIGHTS": True})) + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) + monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (True, "open access")) + utils.update_embargo_rights(meta) + assert meta["item_1"]["attribute_value_mlt"][1]["subitem_access_right"] == "open access" + + # Deep nesting in attribute_value_mlt is handled correctly + meta = { + "item_type_id": "test", + "item_1": { + "attribute_value_mlt": [ + {"attribute_value_mlt": [ + {"subitem_access_right": "embargoed access"} + ]} + ] + } + } + access_path = "item_1.attribute_value_mlt.attribute_value_mlt.subitem_access_right" + monkeypatch.setattr(utils, "current_app", MagicMock(config={"WEKO_SEARCH_FIX_ACCESSRIGHTS": True})) + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) + monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (False, None)) + utils.update_embargo_rights(meta) + assert meta["item_1"]["attribute_value_mlt"][0]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" + + # Tests recursive list branch in _set_nested_value; all nested rights are updated. + meta = { + "item_type_id": "test", + "item_1": { + "attribute_value_mlt": [ + {"attribute_value_mlt": [ + {"subitem_access_right": "embargoed access"}, + {"subitem_access_right": "embargoed access"} + ]}, + {"attribute_value_mlt": [ + {"subitem_access_right": "embargoed access"} + ]} + ] + } + } + access_path = "item_1.attribute_value_mlt.attribute_value_mlt.subitem_access_right" + monkeypatch.setattr(utils, "current_app", MagicMock(config={"WEKO_SEARCH_FIX_ACCESSRIGHTS": True})) + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) + monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (True, "open access")) + utils.update_embargo_rights(meta) + for item in meta["item_1"]["attribute_value_mlt"]: + for subitem in item["attribute_value_mlt"]: + assert subitem["subitem_access_right"] == "open access" + + # File information date (dateValue) is None + meta = { + "item_type_id": "test", + "item_1": { + "attribute_type": "file", + "attribute_value_mlt": [ + { + "date": [ + {"dateType": "Available", "dateValue": None} + ], + "accessrole": "open_date", + "subitem_access_right": "embargoed access" + } + ] + } + } + access_path = "item_1.attribute_value_mlt.subitem_access_right" + monkeypatch.setattr(utils, "current_app", MagicMock(config={"WEKO_SEARCH_FIX_ACCESSRIGHTS": True})) + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) + monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (False, None)) + utils.update_embargo_rights(meta) + assert meta["item_1"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" + + # File information date (dateValue) is invalid format + meta = { + "item_type_id": "test", + "item_1": { + "attribute_type": "file", + "attribute_value_mlt": [ + { + "date": [ + {"dateType": "Available", "dateValue": "test_invalid"} + ], + "accessrole": "open_date", + "subitem_access_right": "embargoed access" + } + ] + } + } + access_path = "item_1.attribute_value_mlt.subitem_access_right" + monkeypatch.setattr(utils, "current_app", MagicMock(config={"WEKO_SEARCH_FIX_ACCESSRIGHTS": True})) + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) + monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (False, None)) + utils.update_embargo_rights(meta) + assert meta["item_1"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" + + # Update value if mapping path points to a key inside attribute_value_mlt list element, even if not directly in the dict. + meta = { + "item_type_id": "test", + "item_1": { + "attribute_value_mlt": [ + {"subitem_access_right": "embargoed access"} + ] + } + } + access_path = "item_1.subitem_access_right" + monkeypatch.setattr(utils, "current_app", MagicMock(config={"WEKO_SEARCH_FIX_ACCESSRIGHTS": True})) + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) + monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (True, "open access")) + utils.update_embargo_rights(meta) + assert meta["item_1"]["attribute_value_mlt"][0]["subitem_access_right"] == "open access" + + # Test: Do nothing if mapping path points to a key that does not exist in any attribute_value_mlt list element. + meta = { + "item_type_id": "test", + "item_1": { + "attribute_value_mlt": [ + {"dummy": 1} + ] + } + } + access_path = "item_1.subitem_access_right" + monkeypatch.setattr(utils, "current_app", MagicMock(config={"WEKO_SEARCH_FIX_ACCESSRIGHTS": True})) + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) + monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (False, None)) + utils.update_embargo_rights(meta) + assert "subitem_access_right" not in meta["item_1"]["attribute_value_mlt"][0] + + # If mapping points to a non-dict/list (e.g. int), no error and value stays unchanged. + class DummyMapping(dict): + def get(self, key, default=None): + if key == "accessRights.@value": + return "item1.item2.item3" + return super().get(key, default) + def dummy_get_mapping(item_type_id, mapping_type): + return DummyMapping({"accessRights.@value": "item1.item2.item3"}) + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", dummy_get_mapping) + with app.app_context(): + import flask + flask.current_app.config["WEKO_SEARCH_FIX_ACCESSRIGHTS"] = True + metadata = { + "item_type_id": 1, + "item1": {"item2": 12345} + } + update_embargo_rights(metadata) + assert metadata["item1"]["item2"] == 12345 + + # access_path points to a nested key inside a list + meta = { + "item_type_id": "test", + "item_1": { + "attribute_value_mlt": [ + {"subitem_access_right": "embargoed access"}, + {"subitem_access_right": "embargoed access"} + ] + } + } + access_path = "item_1.attribute_value_mlt.subitem_access_right" + monkeypatch.setattr(utils, "current_app", MagicMock(config={"WEKO_SEARCH_FIX_ACCESSRIGHTS": True})) + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) + monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (True, "open access")) + utils.update_embargo_rights(meta) + # Both elements in the list should be updated + assert meta["item_1"]["attribute_value_mlt"][0]["subitem_access_right"] == "open access" + assert meta["item_1"]["attribute_value_mlt"][1]["subitem_access_right"] == "open access" + + # When the list is empty + meta = { + "item_type_id": "test", + "item_1": { + "attribute_value_mlt": [] + } + } + access_path = "item_1.attribute_value_mlt.subitem_access_right" + monkeypatch.setattr(utils, "current_app", MagicMock(config={"WEKO_SEARCH_FIX_ACCESSRIGHTS": True})) + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) + monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (True, "open access")) + utils.update_embargo_rights(meta) + assert meta["item_1"]["attribute_value_mlt"] == [] diff --git a/modules/weko-records/weko_records/utils.py b/modules/weko-records/weko_records/utils.py index 3400b3adbc..9ef20e2839 100644 --- a/modules/weko-records/weko_records/utils.py +++ b/modules/weko-records/weko_records/utils.py @@ -2936,40 +2936,29 @@ def update_embargo_rights(metadata: dict) -> None: return def _get_nested_value(data, path): - """Recursively get value from nested dict/list by dot-separated path.""" + """Recursively get value from nested dict by dot-separated path.""" keys = path.split('.') - for key in keys: - if isinstance(data, dict): - if key in data: - data = data[key] - elif 'attribute_value_mlt' in data: - found = None - for item in data['attribute_value_mlt']: - found = _get_nested_value( - item, - '.'.join(keys[keys.index(key):]) - ) - if found is not None: - break - data = found - break - else: - data = None - break - elif isinstance(data, list): - found = None - for item in data: - found = _get_nested_value( - item, - '.'.join([key] + keys[keys.index(key)+1:]) - ) + if not path or not keys or keys == ['']: + return data + key = keys[0] + rest = '.'.join(keys[1:]) + if isinstance(data, dict): + if key in data: + return _get_nested_value(data[key], rest) + if 'attribute_value_mlt' in data: + for item in data['attribute_value_mlt']: + found = _get_nested_value(item, rest) if found is not None: - break - data = found - break - else: - return None - return data + return found + return None + elif isinstance(data, list): + for item in data: + found = _get_nested_value(item, path) + if found is not None: + return found + return None + else: + return None access_right_value = _get_nested_value(metadata, access_path) if not access_right_value: @@ -3000,46 +2989,6 @@ def _get_nested_value(data, path): date_val = None if accessrole_val: accessrole_date.append((accessrole_val, date_val)) - elif isinstance(v, list): - for data in v: - if ( - isinstance(data, dict) and - data.get("attribute_type") == "file" - ): - mlt = data.get("attribute_value_mlt", []) - if mlt: - for file_data in mlt: - date_val = None - accessrole_val = file_data.get("accessrole") - if ( - "date" in file_data and - isinstance(file_data["date"], list) and - file_data["date"] - ): - date_val = file_data["date"][0].get("dateValue") - if date_val: - if re.match(r"^\d{4}-\d{2}-\d{2}$", date_val): - date_val = datetime.strptime(date_val, "%Y-%m-%d").date() - else: - date_val = None - if accessrole_val: - accessrole_date.append((accessrole_val, date_val)) - else: - date_val = None - accessrole_val = data.get("accessrole") - if ( - "date" in data and - isinstance(data["date"], list) and - data["date"] - ): - date_val = data["date"][0].get("dateValue") - if date_val: - if re.match(r"^\d{4}-\d{2}-\d{2}$", date_val): - date_val = datetime.strptime(date_val, "%Y-%m-%d").date() - else: - date_val = None - if accessrole_val: - accessrole_date.append((accessrole_val, date_val)) from .utils import check_embargo_rights is_update, change_value = check_embargo_rights( @@ -3048,20 +2997,27 @@ def _get_nested_value(data, path): def _set_nested_value(data, path, value): keys = path.split('.') - if ( - keys[-1] == 'subitem_access_right' and - isinstance(data.get(keys[0]), dict) - ): - target = data[keys[0]] - target[keys[-1]] = value - mlt = target.get('attribute_value_mlt', []) - for item in mlt: - if 'subitem_access_right' in item: - item['subitem_access_right'] = value + key = keys[0] + rest = '.'.join(keys[1:]) + if len(keys) == 1: + if isinstance(data, dict): + data[key] = value + if 'attribute_value_mlt' in data: + for item in data['attribute_value_mlt']: + _set_nested_value(item, key, value) + elif isinstance(data, list): + for item in data: + _set_nested_value(item, key, value) else: - for key in keys[:-1]: - data = data.setdefault(key, {}) - data[keys[-1]] = value + if isinstance(data, dict): + if key in data: + _set_nested_value(data[key], rest, value) + if 'attribute_value_mlt' in data: + for item in data['attribute_value_mlt']: + _set_nested_value(item, rest, value) + elif isinstance(data, list): + for item in data: + _set_nested_value(item, path, value) if is_update and change_value: _set_nested_value(metadata, access_path, change_value) diff --git a/modules/weko-search-ui/tests/test_query.py b/modules/weko-search-ui/tests/test_query.py index 34c8e11d1f..1ecf8a343a 100644 --- a/modules/weko-search-ui/tests/test_query.py +++ b/modules/weko-search-ui/tests/test_query.py @@ -1,5 +1,9 @@ import json import copy + +import pytest +from datetime import datetime, timezone +from flask import current_app, request from elasticsearch_dsl.query import Match, Range, Terms, Bool from mock import patch, MagicMock from werkzeug.datastructures import MultiDict @@ -947,3 +951,52 @@ def test_split_text_by_or(): assert _split_text_by_or("AAA OR BBB | CCC") == ["AAA", "BBB", "CCC"] assert _split_text_by_or("AAA OR OR BBB") == ["AAA", "", "BBB"] assert _split_text_by_or("OR AAA |") == ["OR AAA |"] + +@pytest.mark.parametrize("fix_accessrights, accessrights, expected_should, expected_key", [ + (True, "", False, None), # empty + (True, "open access", False, "open access"), # single value + (True, "embargoed access", False, "embargoed access"), + (True, "restricted access", False, "restricted access"), + (True, "metadata only access", False, "metadata only access"), + (True, "open access,embargoed access", True, "open access"), # multiple values + (True, "invalid access", False, None), # invalid value + (False, "open access", False, None), # config disabled +]) +# .tox/c1/bin/pytest --cov=weko_search_ui tests/test_query.py::test_accessrights_query_param -vv -s --cov-branch --cov-report=xml --basetemp=/code/modules/weko-search-ui/.tox/c1/tmp +def test_accessrights_query_param(app, users, fix_accessrights, accessrights, expected_should, expected_key, mocker): + ACCESS_RIGHTS_CHOICES = [ + 'embargoed access', + 'metadata only access', + 'open access', + 'restricted access', + ] + _data = { + 'search_type': '0', + 'accessrights': accessrights, + 'sort': 'controlnumber', + 'page': '1', + 'size': '20', + } + with app.test_client() as client: + login_user_via_session(client, email=users[3]["email"]) + search = RecordsSearch() + + with app.test_request_context(headers=[('Accept-Language','en')], data=_data): + app.config['WEKO_SEARCH_KEYWORDS_DICT'] = WEKO_SEARCH_KEYWORDS_DICT + app.config['WEKO_ADMIN_MANAGEMENT_OPTIONS'] = WEKO_ADMIN_MANAGEMENT_OPTIONS + app.config['WEKO_SEARCH_FIX_ACCESSRIGHTS'] = fix_accessrights + app.config['WEKO_ACCESS_RIGHTS_CHOICES'] = ACCESS_RIGHTS_CHOICES + mocker.patch("weko_search_ui.query.search_permission",side_effect=MockSearchPerm) + mocker.patch("weko_search_ui.permissions.search_permission",side_effect=MockSearchPerm) + app.extensions['invenio-oauth2server'] = 1 + app.extensions['invenio-queues'] = 1 + mock_searchperm = MagicMock(side_effect=MockSearchPerm) + search_result, urlkwargs = default_search_factory(self=None, search=search) + query = search_result.query().to_dict() + must_result = query["query"]["bool"]["filter"][0]["bool"]["must"] + if expected_key: + assert "accessRights" in str(must_result) + assert "accessRights" in str(urlkwargs) + else: + assert "accessRights" not in str(must_result) + assert "accessRights" not in str(urlkwargs) diff --git a/modules/weko-search-ui/tests/test_utils.py b/modules/weko-search-ui/tests/test_utils.py index d8209889c5..1018cbd81d 100644 --- a/modules/weko-search-ui/tests/test_utils.py +++ b/modules/weko-search-ui/tests/test_utils.py @@ -44,7 +44,6 @@ from weko_workflow.headless.activity import HeadlessActivity from weko_workflow.models import Activity, WorkFlow - from weko_search_ui.config import ( ACCESS_RIGHT_TYPE_URI, RESOURCE_TYPE_URI, @@ -76,6 +75,7 @@ delete_records, execute_search_with_pagination, export_all, + fix_aggregations_accessrights, get_retry_info, generate_metadata_from_jpcoar, get_change_identifier_mode_content, @@ -6216,3 +6216,69 @@ def test_none_scheme(self, mock_settings, app, db): handle_check_authors_affiliation(list_record) assert "errors" not in list_record[0] + +# .tox/c1/bin/pytest --cov=weko_search_ui tests/test_utils.py::test_fix_aggregations_accessrights -v -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/weko-search-ui/.tox/c1/tmp +def test_fix_aggregations_accessrights(app, mocker): + # 1. When the flag is OFF, the aggregation result is returned as is. + with app.app_context(): + app.config["WEKO_SEARCH_FIX_ACCESSRIGHTS"] = False + app.config["WEKO_ACCESS_RIGHTS_CHOICES"] = ["embargoed access", "metadata only access", "open access", "restricted access"] + mocker.patch("weko_search_ui.utils.FacetSearchSetting.get_activated_facets_mapping", return_value={"access": "accessRights"}) + aggs = {"access": {"buckets": []}} + data = {"aggregations": aggs.copy()} + result = fix_aggregations_accessrights(data.copy()) + assert result["aggregations"] == aggs # No change when the flag is OFF + + # 2. If the mapping is not accessRights, new_accessRights remains. + with app.app_context(): + app.config["WEKO_SEARCH_FIX_ACCESSRIGHTS"] = True + app.config["WEKO_ACCESS_RIGHTS_CHOICES"] = ["embargoed access", "metadata only access", "open access", "restricted access"] + mocker.patch("weko_search_ui.utils.FacetSearchSetting.get_activated_facets_mapping", return_value={"other": "notAccessRights"}) + aggs = {"access": {"buckets": []}, "new_accessRights": {"buckets": {}}} + data = {"aggregations": aggs.copy()} + result = fix_aggregations_accessrights(data.copy()) + assert "new_accessRights" in result["aggregations"] # new_accessRights remains if mapping does not match + assert result["aggregations"]["access"] == {"buckets": []} + + # 3. If new_accessRights does not exist, return as is. + with app.app_context(): + app.config["WEKO_SEARCH_FIX_ACCESSRIGHTS"] = True + app.config["WEKO_ACCESS_RIGHTS_CHOICES"] = ["embargoed access", "metadata only access", "open access", "restricted access"] + mocker.patch("weko_search_ui.utils.FacetSearchSetting.get_activated_facets_mapping", return_value={"access": "accessRights"}) + aggs = {"access": {"buckets": []}} + data = {"aggregations": aggs.copy()} + result = fix_aggregations_accessrights(data.copy()) + assert result["aggregations"] == aggs # No change if new_accessRights does not exist + + # 4. If ACCESS_RIGHTS_CHOICES are not in new_accessRights buckets, new_accessRights remains. + with app.app_context(): + app.config["WEKO_SEARCH_FIX_ACCESSRIGHTS"] = True + app.config["WEKO_ACCESS_RIGHTS_CHOICES"] = ["embargoed access", "metadata only access", "open access", "restricted access"] + mocker.patch("weko_search_ui.utils.FacetSearchSetting.get_activated_facets_mapping", return_value={"access": "accessRights"}) + aggs = {"access": {"buckets": []}, "new_accessRights": {"buckets": {}}} + data = {"aggregations": aggs.copy()} + result = fix_aggregations_accessrights(data.copy()) + assert "new_accessRights" in result["aggregations"] # new_accessRights remains if ACCESS_RIGHTS_CHOICES not in buckets + assert result["aggregations"]["access"] == {"buckets": []} + + # 5. If doc_count is 0, new_accessRights remains. + with app.app_context(): + app.config["WEKO_SEARCH_FIX_ACCESSRIGHTS"] = True + app.config["WEKO_ACCESS_RIGHTS_CHOICES"] = ["embargoed access", "metadata only access", "open access", "restricted access"] + mocker.patch("weko_search_ui.utils.FacetSearchSetting.get_activated_facets_mapping", return_value={"access": "accessRights"}) + aggs = {"access": {"buckets": []}, "new_accessRights": {"buckets": {"open access": {"doc_count": 0}}}} + data = {"aggregations": aggs.copy()} + result = fix_aggregations_accessrights(data.copy()) + assert "new_accessRights" in result["aggregations"] # new_accessRights remains if doc_count is 0 + assert result["aggregations"]["access"] == {"buckets": []} + + # 6. Normal case: If doc_count > 0, accessrights are set in buckets and new_accessRights is removed. + with app.app_context(): + app.config["WEKO_SEARCH_FIX_ACCESSRIGHTS"] = True + app.config["WEKO_ACCESS_RIGHTS_CHOICES"] = ["embargoed access", "metadata only access", "open access", "restricted access"] + mocker.patch("weko_search_ui.utils.FacetSearchSetting.get_activated_facets_mapping", return_value={"access": "accessRights"}) + aggs = {"access": {"buckets": []}, "new_accessRights": {"buckets": {"open access": {"doc_count": 2}, "embargoed access": {"doc_count": 1}}}} + data = {"aggregations": aggs.copy()} + result = fix_aggregations_accessrights(data.copy()) + assert "new_accessRights" not in result["aggregations"] # new_accessRights is removed in normal case + assert result["aggregations"]["access"] == {"buckets": [{"key": "embargoed access", "doc_count": 1}, {"key": "open access", "doc_count": 2}]} diff --git a/modules/weko-search-ui/weko_search_ui/query.py b/modules/weko-search-ui/weko_search_ui/query.py index 1cb5ad011c..ce663cb830 100644 --- a/modules/weko-search-ui/weko_search_ui/query.py +++ b/modules/weko-search-ui/weko_search_ui/query.py @@ -636,7 +636,7 @@ def __get_accessrights_query(params): if not accessrights_list: return None - now = datetime.now(timezone.utc).isoformat() + now = datetime.now().isoformat() def open_access_query(now): """Query for open access.""" @@ -718,8 +718,6 @@ def metadata_only_query(): queries.append(metadata_only_query()) queries = [q for q in queries if q is not None] - if not queries: - return None if len(queries) == 1: return queries[0] return Q('bool', should=queries, minimum_should_match=1) diff --git a/modules/weko-search-ui/weko_search_ui/utils.py b/modules/weko-search-ui/weko_search_ui/utils.py index 7bef7d1bb4..6e021b6da9 100644 --- a/modules/weko-search-ui/weko_search_ui/utils.py +++ b/modules/weko-search-ui/weko_search_ui/utils.py @@ -6198,11 +6198,10 @@ def fix_aggregations_accessrights(data): if doc_count == 0: continue buckets.append({"key": right, "doc_count": doc_count}) - # Update buckets for each accessRights facet - for key in accessrights_keys: - if key in aggs: - aggs[key]["buckets"] = buckets - # Remove temporary aggregation - aggs.pop("new_accessRights", None) + if buckets: + for key in accessrights_keys: + if key in aggs: + aggs[key]["buckets"] = buckets + aggs.pop("new_accessRights", None) data['aggregations'] = aggs return data From e72444d95ca7506fc48e451b4a189d91660ad0de Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Tue, 17 Mar 2026 15:14:37 +0900 Subject: [PATCH 06/28] fix update_embargo_rights --- modules/weko-records/weko_records/utils.py | 43 ++++++++-------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/modules/weko-records/weko_records/utils.py b/modules/weko-records/weko_records/utils.py index 9ef20e2839..742ba545c5 100644 --- a/modules/weko-records/weko_records/utils.py +++ b/modules/weko-records/weko_records/utils.py @@ -2936,29 +2936,22 @@ def update_embargo_rights(metadata: dict) -> None: return def _get_nested_value(data, path): - """Recursively get value from nested dict by dot-separated path.""" keys = path.split('.') - if not path or not keys or keys == ['']: - return data - key = keys[0] - rest = '.'.join(keys[1:]) - if isinstance(data, dict): - if key in data: - return _get_nested_value(data[key], rest) - if 'attribute_value_mlt' in data: - for item in data['attribute_value_mlt']: - found = _get_nested_value(item, rest) - if found is not None: - return found - return None - elif isinstance(data, list): - for item in data: - found = _get_nested_value(item, path) - if found is not None: - return found - return None - else: - return None + for idx, key in enumerate(keys): + if isinstance(data, dict): + if key in data: + data = data[key] + elif 'attribute_value_mlt' in data: + for item in data['attribute_value_mlt']: + found = _get_nested_value(item, '.'.join(keys[idx:])) + if found is not None: + return found + return None + else: + return None + else: + return None + return data access_right_value = _get_nested_value(metadata, access_path) if not access_right_value: @@ -3005,9 +2998,6 @@ def _set_nested_value(data, path, value): if 'attribute_value_mlt' in data: for item in data['attribute_value_mlt']: _set_nested_value(item, key, value) - elif isinstance(data, list): - for item in data: - _set_nested_value(item, key, value) else: if isinstance(data, dict): if key in data: @@ -3015,9 +3005,6 @@ def _set_nested_value(data, path, value): if 'attribute_value_mlt' in data: for item in data['attribute_value_mlt']: _set_nested_value(item, rest, value) - elif isinstance(data, list): - for item in data: - _set_nested_value(item, path, value) if is_update and change_value: _set_nested_value(metadata, access_path, change_value) From 6c87719ede9fb6c5f00199ac6deac109e31aa241 Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Thu, 19 Mar 2026 22:01:40 +0900 Subject: [PATCH 07/28] refix update_embargo_rights --- modules/weko-records/tests/test_utils.py | 27 +++++-------- modules/weko-records/weko_records/utils.py | 47 ++++++++++++++-------- 2 files changed, 40 insertions(+), 34 deletions(-) diff --git a/modules/weko-records/tests/test_utils.py b/modules/weko-records/tests/test_utils.py index c414011014..ba43dee0fb 100644 --- a/modules/weko-records/tests/test_utils.py +++ b/modules/weko-records/tests/test_utils.py @@ -2651,6 +2651,7 @@ def test_replace_fqdn_of_file_metadata(app): replace_fqdn_of_file_metadata(_file_metadata_list2) assert _file_metadata_list2==[{'url': {'url': 'https://localhost/a'}, 'version_id': '1'}, {'url': {'url': 'https://localhost/b'}, 'version_id': '1'}] +import datetime # .tox/c1/bin/pytest --cov=weko_records tests/test_utils.py::test_check_embargo_rights -v -s -vv --cov-branch --cov-report=term --cov-config=tox.ini --basetemp=/code/modules/weko-records/.tox/c1/tmp def test_check_embargo_rights(): # Do nothing except for 'embargoed access' @@ -2741,7 +2742,6 @@ def test_update_embargo_rights(app, monkeypatch): ], "accessrole": "open_date", "version_id": "55995df6-6d1c-4bbf-8530-d1c0fb5a4219", - "subitem_access_right": "embargoed access" } ] } @@ -2750,45 +2750,38 @@ def test_update_embargo_rights(app, monkeypatch): # Embargo is released and access becomes open_access cfg = True meta = copy.deepcopy(base_meta) - access_path = "item_1736148125517.attribute_value_mlt.subitem_access_right" + access_path = "item_1736146823660.attribute_value_mlt.subitem_access_right" monkeypatch.setattr(utils, "current_app", MagicMock(config={"WEKO_SEARCH_FIX_ACCESSRIGHTS": cfg})) monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (True, "open access")) utils.update_embargo_rights(meta) - assert meta["item_1736148125517"]["attribute_value_mlt"][0]["subitem_access_right"] == "open access" + assert meta["item_1736146823660"]["attribute_value_mlt"][0]["subitem_access_right"] == "open access" # Embargo is released and access becomes restricted_access meta = copy.deepcopy(base_meta) monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (True, "restricted access")) utils.update_embargo_rights(meta) - assert meta["item_1736148125517"]["attribute_value_mlt"][0]["subitem_access_right"] == "restricted access" + assert meta["item_1736146823660"]["attribute_value_mlt"][0]["subitem_access_right"] == "restricted access" # Embargo is not released (returns None), access remains embargoed meta = copy.deepcopy(base_meta) monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (False, None)) utils.update_embargo_rights(meta) - assert meta["item_1736148125517"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" + assert meta["item_1736146823660"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" # File information with accessrole=None is handled correctly meta = copy.deepcopy(base_meta) - meta["item_1736148125517"]["attribute_value_mlt"][0]["accessrole"] = None + meta["item_1736146823660"]["attribute_value_mlt"][0]["accessrole"] = None monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (False, None)) utils.update_embargo_rights(meta) - assert meta["item_1736148125517"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" + assert meta["item_1736146823660"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" # Access rights information with None is handled correctly meta = copy.deepcopy(base_meta) meta["item_1736146823660"]["attribute_value_mlt"][0]["subitem_access_right"] = None monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (False, None)) utils.update_embargo_rights(meta) - assert meta["item_1736148125517"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" - - # Access rights information with empty list is handled correctly - meta = copy.deepcopy(base_meta) - meta["item_1736146823660"]["attribute_value_mlt"] = [] - monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (False, None)) - utils.update_embargo_rights(meta) - assert meta["item_1736148125517"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" + assert meta["item_1736146823660"]["attribute_value_mlt"][0]["subitem_access_right"] == None # Empty meta dict does not change anything meta = {} @@ -2801,7 +2794,7 @@ def test_update_embargo_rights(app, monkeypatch): monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {}) # accessRights.@valueなし monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (True, "open access")) utils.update_embargo_rights(meta) - assert meta["item_1736148125517"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" + assert meta["item_1736146823660"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" # No change if WEKO_SEARCH_FIX_ACCESSRIGHTS is False meta = copy.deepcopy(base_meta) @@ -2809,7 +2802,7 @@ def test_update_embargo_rights(app, monkeypatch): monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (True, "open access")) utils.update_embargo_rights(meta) - assert meta["item_1736148125517"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" + assert meta["item_1736146823660"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" # Only the second element in attribute_value_mlt list is the target key meta = { diff --git a/modules/weko-records/weko_records/utils.py b/modules/weko-records/weko_records/utils.py index 742ba545c5..d7469b5cb8 100644 --- a/modules/weko-records/weko_records/utils.py +++ b/modules/weko-records/weko_records/utils.py @@ -2937,21 +2937,30 @@ def update_embargo_rights(metadata: dict) -> None: def _get_nested_value(data, path): keys = path.split('.') - for idx, key in enumerate(keys): - if isinstance(data, dict): - if key in data: - data = data[key] - elif 'attribute_value_mlt' in data: - for item in data['attribute_value_mlt']: - found = _get_nested_value(item, '.'.join(keys[idx:])) - if found is not None: - return found - return None + key = keys[0] + rest = '.'.join(keys[1:]) + if isinstance(data, dict): + if key in data: + if rest: + return _get_nested_value(data[key], rest) else: - return None + return data[key] + elif 'attribute_value_mlt' in data: + for item in data['attribute_value_mlt']: + found = _get_nested_value(item, '.'.join(keys)) + if found is not None: + return found + return None else: return None - return data + elif isinstance(data, list): + for item in data: + found = _get_nested_value(item, '.'.join(keys)) + if found is not None: + return found + return None + else: + return None access_right_value = _get_nested_value(metadata, access_path) if not access_right_value: @@ -2992,12 +3001,16 @@ def _set_nested_value(data, path, value): keys = path.split('.') key = keys[0] rest = '.'.join(keys[1:]) + if isinstance(data, list): + for item in data: + _set_nested_value(item, path, value) + return if len(keys) == 1: - if isinstance(data, dict): + if isinstance(data, dict) and key in data: data[key] = value - if 'attribute_value_mlt' in data: - for item in data['attribute_value_mlt']: - _set_nested_value(item, key, value) + if isinstance(data, dict) and 'attribute_value_mlt' in data: + for item in data['attribute_value_mlt']: + _set_nested_value(item, key, value) else: if isinstance(data, dict): if key in data: @@ -3007,4 +3020,4 @@ def _set_nested_value(data, path, value): _set_nested_value(item, rest, value) if is_update and change_value: - _set_nested_value(metadata, access_path, change_value) + _set_nested_value(metadata, access_path, change_value) \ No newline at end of file From 3fd1f6c408ebdf33ab4fb6e0e468c917951ead20 Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Wed, 25 Mar 2026 15:52:35 +0900 Subject: [PATCH 08/28] fix accessrights query --- modules/weko-admin/weko_admin/utils.py | 66 +++++++++++++++---- .../weko-search-ui/weko_search_ui/query.py | 15 +++-- 2 files changed, 63 insertions(+), 18 deletions(-) diff --git a/modules/weko-admin/weko_admin/utils.py b/modules/weko-admin/weko_admin/utils.py index ff367ea102..1195e200bb 100755 --- a/modules/weko-admin/weko_admin/utils.py +++ b/modules/weko-admin/weko_admin/utils.py @@ -2234,16 +2234,29 @@ def create_aggregations(facets): "must": [ {"term": {"accessRights": "embargoed access"}}, { - "nested": { - "path": "content", - "query": { - "bool": { - "must": [ - {"term": {"content.accessrole.raw": "open_date"}}, - {"range": {"content.date.dateValue.raw": {"lte": "@date"}}} - ] + "bool": { + "must_not": [ + { + "nested": { + "path": "content", + "query": { + "bool": { + "must_not": [ + {"term": {"content.accessrole.raw": "open_access"}}, + { + "bool": { + "must": [ + {"term": {"content.accessrole.raw": "open_date"}}, + {"range": {"content.date.dateValue.raw": {"lte": "@date"}}} + ] + } + } + ] + } + } + } } - } + ] } } ] @@ -2269,8 +2282,16 @@ def create_aggregations(facets): {"range": {"content.date.dateValue.raw": {"gt": "@date"}}} ] } + }, + { + "bool": { + "must": [ + {"term": {"content.accessrole.raw": "open_no"}} + ] + } } - ] + ], + "minimum_should_match": 1 } } } @@ -2280,9 +2301,13 @@ def create_aggregations(facets): { "nested": { "path": "content", - "query": { - "term": {"content.accessrole.raw": "open_restricted"} - } + "query": {"term": {"content.accessrole.raw": "open_restricted"}} + } + }, + { + "nested": { + "path": "content", + "query": {"term": {"content.accessrole.raw": "open_login"}} } } ] @@ -2325,6 +2350,21 @@ def create_aggregations(facets): } ] } + }, + { + "bool": { + "must": [ + {"term": {"accessRights": "embargoed access"}}, + { + "nested": { + "path": "content", + "query": { + "term": {"content.accessrole.raw": "open_restricted"} + } + } + } + ] + } } ] } diff --git a/modules/weko-search-ui/weko_search_ui/query.py b/modules/weko-search-ui/weko_search_ui/query.py index ce663cb830..eae250e1de 100644 --- a/modules/weko-search-ui/weko_search_ui/query.py +++ b/modules/weko-search-ui/weko_search_ui/query.py @@ -669,14 +669,15 @@ def embargoed_access_query(now): Q('bool', must=[ Q('term', **{'content.accessrole.raw': 'open_date'}), Q('range', **{'content.date.dateValue.raw': {'gt': now}}) + ]), + Q('bool', must=[ + Q('term', **{'content.accessrole.raw': 'open_no'}) ]) - ])) + ], minimum_should_match=1)) ], must_not=[ - Q( - 'nested', path='content', - query=Q('term', **{'content.accessrole.raw': 'open_restricted'}) - ) + Q('nested', path='content', query=Q('term', **{'content.accessrole.raw': 'open_restricted'})), + Q('nested', path='content', query=Q('term', **{'content.accessrole.raw': 'open_login'})) ] ) @@ -698,6 +699,10 @@ def restricted_access_query(now): Q('range', **{'content.date.dateValue.raw': {'gt': now}}) ])) ]) + ]), + Q('bool', must=[ + Q('term', accessRights='embargoed access'), + Q('nested', path='content', query=Q('term', **{'content.accessrole.raw': 'open_restricted'})) ]) ] ) From cfe0f75f9608b8a3d0ba039063e8eb8acd0f8aa0 Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Wed, 25 Mar 2026 19:50:17 +0900 Subject: [PATCH 09/28] fix embargoed access query --- modules/weko-admin/weko_admin/utils.py | 46 ++++++++++--------- .../weko-search-ui/weko_search_ui/query.py | 13 +++--- 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/modules/weko-admin/weko_admin/utils.py b/modules/weko-admin/weko_admin/utils.py index 1195e200bb..85ae80e92a 100755 --- a/modules/weko-admin/weko_admin/utils.py +++ b/modules/weko-admin/weko_admin/utils.py @@ -2270,30 +2270,38 @@ def create_aggregations(facets): "must": [ {"term": {"accessRights": "embargoed access"}}, { - "nested": { - "path": "content", - "query": { - "bool": { - "should": [ - { + "bool": { + "should": [ + { + "nested": { + "path": "content", + "query": { "bool": { "must": [ {"term": {"content.accessrole.raw": "open_date"}}, {"range": {"content.date.dateValue.raw": {"gt": "@date"}}} ] } - }, - { - "bool": { - "must": [ - {"term": {"content.accessrole.raw": "open_no"}} - ] - } } - ], - "minimum_should_match": 1 + } + }, + { + "bool": { + "must": [ + {"nested": { + "path": "content", + "query": {"term": {"content.accessrole.raw": "open_no"}} + }} + ], + "must_not": [ + {"nested": { + "path": "content", + "query": {"term": {"content.accessrole.raw": "open_login"}} + }} + ] + } } - } + ] } } ], @@ -2303,12 +2311,6 @@ def create_aggregations(facets): "path": "content", "query": {"term": {"content.accessrole.raw": "open_restricted"}} } - }, - { - "nested": { - "path": "content", - "query": {"term": {"content.accessrole.raw": "open_login"}} - } } ] } diff --git a/modules/weko-search-ui/weko_search_ui/query.py b/modules/weko-search-ui/weko_search_ui/query.py index eae250e1de..e90ff22d08 100644 --- a/modules/weko-search-ui/weko_search_ui/query.py +++ b/modules/weko-search-ui/weko_search_ui/query.py @@ -665,19 +665,20 @@ def embargoed_access_query(now): 'bool', must=[ Q('term', accessRights='embargoed access'), - Q('nested', path='content', query=Q('bool', should=[ - Q('bool', must=[ + Q('bool', should=[ + Q('nested', path='content', query=Q('bool', must=[ Q('term', **{'content.accessrole.raw': 'open_date'}), Q('range', **{'content.date.dateValue.raw': {'gt': now}}) - ]), + ])), Q('bool', must=[ - Q('term', **{'content.accessrole.raw': 'open_no'}) + Q('nested', path='content', query=Q('term', **{'content.accessrole.raw': 'open_no'})) + ], must_not=[ + Q('nested', path='content', query=Q('term', **{'content.accessrole.raw': 'open_login'})) ]) - ], minimum_should_match=1)) + ]) ], must_not=[ Q('nested', path='content', query=Q('term', **{'content.accessrole.raw': 'open_restricted'})), - Q('nested', path='content', query=Q('term', **{'content.accessrole.raw': 'open_login'})) ] ) From dff2c7fa6074066c5c4562b6b786a1d04682b50d Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Thu, 26 Mar 2026 14:25:56 +0900 Subject: [PATCH 10/28] refix accessRights query --- modules/weko-admin/weko_admin/utils.py | 11 +++++++++++ modules/weko-search-ui/weko_search_ui/query.py | 4 ++++ 2 files changed, 15 insertions(+) diff --git a/modules/weko-admin/weko_admin/utils.py b/modules/weko-admin/weko_admin/utils.py index 85ae80e92a..838f1a3fc2 100755 --- a/modules/weko-admin/weko_admin/utils.py +++ b/modules/weko-admin/weko_admin/utils.py @@ -2233,6 +2233,7 @@ def create_aggregations(facets): "bool": { "must": [ {"term": {"accessRights": "embargoed access"}}, + {"nested": {"path": "content", "query": {"exists": {"field": "content.accessrole.raw"}}}}, { "bool": { "must_not": [ @@ -2300,6 +2301,16 @@ def create_aggregations(facets): }} ] } + }, + { + "bool": { + "must_not": [ + {"nested": { + "path": "content", + "query": {"exists": {"field": "content.accessrole.raw"}} + }} + ] + } } ] } diff --git a/modules/weko-search-ui/weko_search_ui/query.py b/modules/weko-search-ui/weko_search_ui/query.py index e90ff22d08..3280bedfc3 100644 --- a/modules/weko-search-ui/weko_search_ui/query.py +++ b/modules/weko-search-ui/weko_search_ui/query.py @@ -646,6 +646,7 @@ def open_access_query(now): Q('term', accessRights='open access'), Q('bool', must=[ Q('term', accessRights='embargoed access'), + Q('nested', path='content', query=Q('exists', field='content.accessrole.raw')), Q('bool', must_not=[ Q('nested', path='content', query=Q('bool', must_not=[ Q('term', **{'content.accessrole.raw': 'open_access'}), @@ -674,6 +675,9 @@ def embargoed_access_query(now): Q('nested', path='content', query=Q('term', **{'content.accessrole.raw': 'open_no'})) ], must_not=[ Q('nested', path='content', query=Q('term', **{'content.accessrole.raw': 'open_login'})) + ]), + Q('bool', must_not=[ + Q('nested', path='content', query=Q('exists', field='content.accessrole.raw')) ]) ]) ], From 8464de852cdeaac5d1cd497c37be125945082dab Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Thu, 26 Mar 2026 15:44:25 +0900 Subject: [PATCH 11/28] fix __get_accessrights_query --- modules/weko-search-ui/weko_search_ui/query.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/modules/weko-search-ui/weko_search_ui/query.py b/modules/weko-search-ui/weko_search_ui/query.py index 3280bedfc3..2c1c1d3352 100644 --- a/modules/weko-search-ui/weko_search_ui/query.py +++ b/modules/weko-search-ui/weko_search_ui/query.py @@ -614,9 +614,7 @@ def __get_accessrights_query(params): weko_search_fix_accessrights = current_app.config.get( 'WEKO_SEARCH_FIX_ACCESSRIGHTS', False ) - if not weko_search_fix_accessrights: - return None - + accessrights_value = params.get('accessrights') if not accessrights_value: return None @@ -636,6 +634,12 @@ def __get_accessrights_query(params): if not accessrights_list: return None + if not weko_search_fix_accessrights: + if len(accessrights_list) == 1: + return Q('term', accessRights=accessrights_list[0]) + else: + return Q('terms', accessRights=accessrights_list) + now = datetime.now().isoformat() def open_access_query(now): From 33661e927ca0bda515a5d70cb901119da5faa1a4 Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Thu, 26 Mar 2026 21:49:11 +0900 Subject: [PATCH 12/28] fix json_loader --- modules/weko-records/weko_records/utils.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/modules/weko-records/weko_records/utils.py b/modules/weko-records/weko_records/utils.py index d7469b5cb8..91a8dcd774 100644 --- a/modules/weko-records/weko_records/utils.py +++ b/modules/weko-records/weko_records/utils.py @@ -280,6 +280,18 @@ def _set_shared_ids(data): dc.update(dict(owner=int(owner_id))) dc.update(dict(owners=[int(owner_id)])) + dc_copy = copy.deepcopy(dc) + update_embargo_rights(dc_copy) + + access_right = None + for v in dc_copy.values(): + if isinstance(v, dict) and v.get("attribute_name") == "アクセス権": + mlt = v.get("attribute_value_mlt", []) + if mlt and isinstance(mlt, list): + access_right = mlt[0].get("subitem_access_right") + break + jrc["accessRights"] = [access_right] if access_right else [] + del ojson, mjson, item return dc, jrc, is_edit From afd25cd19ee86223514f81465a5065f849f42f64 Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Fri, 27 Mar 2026 14:37:27 +0900 Subject: [PATCH 13/28] fix access_rights_type_uri --- modules/weko-records/weko_records/utils.py | 60 +++++++++++++++++----- 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/modules/weko-records/weko_records/utils.py b/modules/weko-records/weko_records/utils.py index 91a8dcd774..ffb6fc6e6a 100644 --- a/modules/weko-records/weko_records/utils.py +++ b/modules/weko-records/weko_records/utils.py @@ -280,17 +280,40 @@ def _set_shared_ids(data): dc.update(dict(owner=int(owner_id))) dc.update(dict(owners=[int(owner_id)])) - dc_copy = copy.deepcopy(dc) - update_embargo_rights(dc_copy) - - access_right = None - for v in dc_copy.values(): - if isinstance(v, dict) and v.get("attribute_name") == "アクセス権": - mlt = v.get("attribute_value_mlt", []) - if mlt and isinstance(mlt, list): - access_right = mlt[0].get("subitem_access_right") - break - jrc["accessRights"] = [access_right] if access_right else [] + if current_app.config.get("WEKO_SEARCH_FIX_ACCESSRIGHTS", False): + update_embargo_rights(jrc["_item_metadata"]) + + from weko_records.serializers.utils import get_mapping + mapping = get_mapping(item_type_id, "jpcoar_mapping") + access_path = mapping.get("accessRights.@value") + def _get_nested_value(data, path): + keys = path.split('.') + key = keys[0] + rest = '.'.join(keys[1:]) + if isinstance(data, dict): + if key in data: + if rest: + return _get_nested_value(data[key], rest) + else: + return data[key] + elif 'attribute_value_mlt' in data: + for item in data['attribute_value_mlt']: + found = _get_nested_value(item, '.'.join(keys)) + if found is not None: + return found + return None + else: + return None + elif isinstance(data, list): + for item in data: + found = _get_nested_value(item, '.'.join(keys)) + if found is not None: + return found + return None + else: + return None + access_right = _get_nested_value(jrc["_item_metadata"], access_path) if access_path else None + jrc["accessRights"] = [access_right] if access_right else [] del ojson, mjson, item return dc, jrc, is_edit @@ -2947,6 +2970,14 @@ def update_embargo_rights(metadata: dict) -> None: if not access_path: return + if access_path.endswith("subitem_access_right"): + access_uri_path = ( + access_path[:-len("subitem_access_right")] + + "subitem_access_right_uri" + ) + else: + access_uri_path = None + def _get_nested_value(data, path): keys = path.split('.') key = keys[0] @@ -3009,6 +3040,9 @@ def _get_nested_value(data, path): access_right_value, today, accessrole_date ) + access_right_type_uri = current_app.config.get("ACCESS_RIGHT_TYPE_URI", "") + access_right_type_uri_value = access_right_type_uri.get(change_value, "") + def _set_nested_value(data, path, value): keys = path.split('.') key = keys[0] @@ -3032,4 +3066,6 @@ def _set_nested_value(data, path, value): _set_nested_value(item, rest, value) if is_update and change_value: - _set_nested_value(metadata, access_path, change_value) \ No newline at end of file + _set_nested_value(metadata, access_path, change_value) + if access_uri_path and access_right_type_uri_value: + _set_nested_value(metadata, access_uri_path, access_right_type_uri_value) From cc75b99433ca5a325242c089f5bac75ad29b64bf Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Fri, 27 Mar 2026 15:16:21 +0900 Subject: [PATCH 14/28] fix update_embargo_rights --- modules/weko-records/weko_records/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/weko-records/weko_records/utils.py b/modules/weko-records/weko_records/utils.py index ffb6fc6e6a..92de0aeacd 100644 --- a/modules/weko-records/weko_records/utils.py +++ b/modules/weko-records/weko_records/utils.py @@ -3040,7 +3040,7 @@ def _get_nested_value(data, path): access_right_value, today, accessrole_date ) - access_right_type_uri = current_app.config.get("ACCESS_RIGHT_TYPE_URI", "") + access_right_type_uri = current_app.config.get("ACCESS_RIGHT_TYPE_URI", {}) access_right_type_uri_value = access_right_type_uri.get(change_value, "") def _set_nested_value(data, path, value): From dc261af2c95364eab8d7703c7ac913ba6b672736 Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Tue, 31 Mar 2026 11:52:41 +0900 Subject: [PATCH 15/28] fix embargo unit test --- .../invenio_records_rest/facets.py | 5 +- .../invenio-records-rest/tests/test_facets.py | 1 + modules/weko-admin/tests/test_utils.py | 103 +++++++++++---- modules/weko-records/tests/test_utils.py | 125 +++++++++++++++++- modules/weko-records/weko_records/utils.py | 3 +- modules/weko-search-ui/tests/test_query.py | 21 ++- .../weko-search-ui/weko_search_ui/config.py | 2 +- 7 files changed, 215 insertions(+), 45 deletions(-) diff --git a/modules/invenio-records-rest/invenio_records_rest/facets.py b/modules/invenio-records-rest/invenio_records_rest/facets.py index a485e2aa4b..5d30f92e17 100644 --- a/modules/invenio-records-rest/invenio_records_rest/facets.py +++ b/modules/invenio-records-rest/invenio_records_rest/facets.py @@ -94,10 +94,11 @@ def inner(values): def _create_filter_dsl(urlkwargs, definitions): """Create a filter DSL expression.""" filters = [] + weko_search_fix_accessrights = current_app.config.get("WEKO_SEARCH_FIX_ACCESSRIGHTS", False) for name, filter_factory in definitions.items(): values = request.values.getlist(name, type=text_type) if values: - if name in ("Access", "accessRights") and "new_accessRights" in definitions: + if name in ("Access", "accessRights") and "new_accessRights" in definitions and weko_search_fix_accessrights: new_accessrights_filters = definitions["new_accessRights"]["filters"]["filters"] access_rights_queries = [] for v in values: @@ -148,7 +149,7 @@ def default_facets_factory(search, index): all fields and values used. """ urlkwargs = MultiDict() - + from weko_admin.utils import get_facet_search_query from weko_search_ui.permissions import search_permission facets = get_facet_search_query(search_permission.can()).get(index) diff --git a/modules/invenio-records-rest/tests/test_facets.py b/modules/invenio-records-rest/tests/test_facets.py index 3c4a7ef40b..bc2843e33a 100644 --- a/modules/invenio-records-rest/tests/test_facets.py +++ b/modules/invenio-records-rest/tests/test_facets.py @@ -87,6 +87,7 @@ def test_create_filter_dsl(): def test_create_filter_dsl_accessrights(): app = Flask('testapp') + app.config['WEKO_SEARCH_FIX_ACCESSRIGHTS'] = True # Definition of new_accessRights definitions = { "accessRights": lambda values: Q('terms', accessRights=values), diff --git a/modules/weko-admin/tests/test_utils.py b/modules/weko-admin/tests/test_utils.py index fe8c86f7e2..4a88db502e 100755 --- a/modules/weko-admin/tests/test_utils.py +++ b/modules/weko-admin/tests/test_utils.py @@ -2,7 +2,7 @@ import uuid from mock import patch from datetime import datetime, timedelta -from flask import current_app, Markup +from flask import current_app, Markup, Flask from io import StringIO import pytest import json @@ -2171,10 +2171,6 @@ def test_create_facet_search_query(facet_search_settings): assert has_permission == test_has_permission assert no_permission == test_no_permission -from flask import Flask, current_app -from datetime import datetime -from weko_admin.utils import create_facet_search_query - class DummyFacet: def __init__(self, name_en, mapping, aggregations): self.name_en = name_en @@ -2192,17 +2188,31 @@ def test_create_aggregations_branch(mocker): "bool": { "must": [ {"term": {"accessRights": "embargoed access"}}, + {"nested": {"path": "content", "query": {"exists": {"field": "content.accessrole.raw"}}}}, { - "nested": { - "path": "content", - "query": { - "bool": { - "must": [ - {"term": {"content.accessrole.raw": "open_date"}}, - {"range": {"content.date.dateValue.raw": {"lte": "@date"}}} - ] + "bool": { + "must_not": [ + { + "nested": { + "path": "content", + "query": { + "bool": { + "must_not": [ + {"term": {"content.accessrole.raw": "open_access"}}, + { + "bool": { + "must": [ + {"term": {"content.accessrole.raw": "open_date"}}, + {"range": {"content.date.dateValue.raw": {"lte": "@date"}}} + ] + } + } + ] + } + } + } } - } + ] } } ] @@ -2216,12 +2226,12 @@ def test_create_aggregations_branch(mocker): "must": [ {"term": {"accessRights": "embargoed access"}}, { - "nested": { - "path": "content", - "query": { - "bool": { - "should": [ - { + "bool": { + "should": [ + { + "nested": { + "path": "content", + "query": { "bool": { "must": [ {"term": {"content.accessrole.raw": "open_date"}}, @@ -2229,9 +2239,35 @@ def test_create_aggregations_branch(mocker): ] } } - ] + } + }, + { + "bool": { + "must": [ + {"nested": { + "path": "content", + "query": {"term": {"content.accessrole.raw": "open_no"}} + }} + ], + "must_not": [ + {"nested": { + "path": "content", + "query": {"term": {"content.accessrole.raw": "open_login"}} + }} + ] + } + }, + { + "bool": { + "must_not": [ + {"nested": { + "path": "content", + "query": {"exists": {"field": "content.accessrole.raw"}} + }} + ] + } } - } + ] } } ], @@ -2239,9 +2275,7 @@ def test_create_aggregations_branch(mocker): { "nested": { "path": "content", - "query": { - "term": {"content.accessrole.raw": "open_restricted"} - } + "query": {"term": {"content.accessrole.raw": "open_restricted"}} } } ] @@ -2284,6 +2318,21 @@ def test_create_aggregations_branch(mocker): } ] } + }, + { + "bool": { + "must": [ + {"term": {"accessRights": "embargoed access"}}, + { + "nested": { + "path": "content", + "query": { + "term": {"content.accessrole.raw": "open_restricted"} + } + } + } + ] + } } ] } @@ -2304,9 +2353,6 @@ def test_create_aggregations_branch(mocker): with app.app_context(): has_permission, no_permission = create_facet_search_query() aggs = has_permission["testidx"]["aggs"] - print("--- new_accessRights aggregation ---") - import pprint - pprint.pprint(aggs.get("new_accessRights")) assert "new_accessRights" not in aggs # 2. ACCESSRIGHTS_FIX_ENABLED=True, no accessRights facet @@ -2340,7 +2386,6 @@ def test_create_aggregations_branch(mocker): assert access_type in filters assert "bool" in filters[access_type] import copy - from datetime import datetime for access_type in app.config["WEKO_ACCESS_RIGHTS_CHOICES"]: template = copy.deepcopy(ACCESS_RIGHTS_QUERY_TEMPLATE[access_type]) actual_bool = filters[access_type]["bool"] diff --git a/modules/weko-records/tests/test_utils.py b/modules/weko-records/tests/test_utils.py index ba43dee0fb..b781302290 100644 --- a/modules/weko-records/tests/test_utils.py +++ b/modules/weko-records/tests/test_utils.py @@ -7,6 +7,7 @@ import copy from mock import patch, MagicMock from tests.helpers import json_data +import types from invenio_accounts import testutils from weko_admin.models import AdminSettings @@ -838,6 +839,111 @@ def dumps(self): assert jrc == {'weko_creator_id': '1', 'item_5': ['item_5'], 'item_6': ['item_6_1_v'], 'item_3': ['item_3_1_v'], 'item_4': ['item_4_1_v'], 'creator1': {'nameIdentifier': ['1234', '5678']}, 'control_number': '1', '_oai': {'id': '1'}, '_item_metadata': OrderedDict([('item_1', {'attribute_name': 'item_1', 'attribute_value': 'item_1_v'}), ('item_2', {'attribute_name': 'item_2', 'attribute_value': 'item_2_v'}), ('item_3', {'attribute_name': 'item_3', 'attribute_type': 'creator', 'attribute_value_mlt': [{'item_3_1': 'item_3_1_v'}]}), ('item_4', {'attribute_name': 'item_4', 'attribute_value_mlt': [{'item_4_1': 'item_4_1_v'}]}), ('item_5', {'attribute_name': 'item_5', 'attribute_type': 'file', 'attribute_value_mlt': [{'filename': 'item_5'}]}), ('item_6', {'attribute_name': 'item_6', 'attribute_value': ['item_6_1', 'item_6_1_v']}), ('item_7', {'attribute_name': 'item_7', 'attribute_value_mlt': [{}, {'nameIdentifiers': [{'nameIdentifierScheme': 'WEKO', 'nameIdentifier': '1234'}]}]}), ('item_8', {'attribute_name': 'item_8', 'attribute_value_mlt': [{'nameIdentifiers': [{'nameIdentifierScheme': 'WEKO', 'nameIdentifier': '5678'}]}]}), ('item_title', 'test_item2'), ('item_type_id', '3'), ('control_number', '1'), ('author_link', ['1234', '5678']), ('weko_shared_ids', 2), ('_oai', {'id': '1'}), ('owner', 1), ('owners', [1])]), 'itemtype': 'test10', 'publish_date': None, 'author_link': ['1234', '5678'], 'weko_shared_ids': 2} assert is_edit == False +# .tox/c1/bin/pytest --cov=weko_records tests/test_utils.py::test_json_loader_access_rights -v -s -vv --cov-branch --cov-report=html --cov-config=tox.ini --basetemp=/code/modules/weko-records/.tox/c1/tmp +def test_json_loader_access_rights(app, db, monkeypatch): + app.config["WEKO_SEARCH_FIX_ACCESSRIGHTS"] = True + + minimal_item_data = { + "$schema": "test_schema/1", + "item_type_id": "test", + "item_1": { + "attribute_name": "アクセス権", + "attribute_value_mlt": [ + { + "subitem_access_right": "open access", + "subitem_access_right_uri": "http://purl.org/coar/access_right/c_f1cf" + } + ] + }, + } + minimal_item_data2 = { + "$schema": "test_schema/1", + "item_type_id": "test", + "item_1": { + "attribute_name": "アクセス権", + "attribute_value_mlt": [{}] + }, + } + minimal_item_data3 = { + "$schema": "test_schema/1", + "item_type_id": "test", + "item_1": { + "attribute_name": "アクセス権", + "attribute_value_mlt": "test_data" + }, + } + class DummyPid: + object_uuid = "dummy-uuid" + pid_value = "dummy-pid" + minimal_pid = DummyPid() + + # ItemTypes, Mapping, SchemaTree, PersistentIdentifier, current_pidstore などをモック + from collections import UserDict + class DummyOjson(UserDict): + pass + + dummy_ojson = DummyOjson() + dummy_ojson["properties"] = { + "item_1": { + "type": "array", + "items": {"properties": {}} + } + } + dummy_ojson.model = types.SimpleNamespace() + dummy_ojson.model.item_type_name = types.SimpleNamespace() + dummy_ojson.model.item_type_name.name = "dummy_type" + dummy_mjson = types.SimpleNamespace() + dummy_mjson.dumps = lambda: {"item_1": {}} + dummy_schema_tree = types.SimpleNamespace() + dummy_schema_tree.get_jpcoar_json = lambda jpcoar, replace_field=True: { + "item_1": { + "attribute_value_mlt": [ + {"subitem_access_right": "open access"} + ] + } + } + monkeypatch.setattr("weko_records.utils.ItemTypes", types.SimpleNamespace(get_record=lambda *a, **k: dummy_ojson)) + monkeypatch.setattr("weko_records.utils.Mapping", types.SimpleNamespace(get_record=lambda *a, **k: dummy_mjson)) + monkeypatch.setattr("weko_records.utils.SchemaTree", types.SimpleNamespace(get_jpcoar_json=dummy_schema_tree.get_jpcoar_json)) + monkeypatch.setattr("weko_records.utils.PersistentIdentifier", types.SimpleNamespace(get_by_object=lambda *a, **k: types.SimpleNamespace(pid_value="oaiid"), get=lambda *a, **k: types.SimpleNamespace(object_uuid="dummy-uuid"))) + monkeypatch.setattr("weko_records.utils.current_pidstore", types.SimpleNamespace(minters={"oaiid": lambda *a, **k: types.SimpleNamespace(pid_value="oaiid")})) + monkeypatch.setattr("weko_records.utils.current_user", types.SimpleNamespace(get_id=lambda: "1")) + monkeypatch.setattr("weko_records.utils.COPY_NEW_FIELD", False) + monkeypatch.setattr("weko_records.utils.sm", types.SimpleNamespace(get=lambda: None)) + monkeypatch.setattr("weko_records.utils.ad_config", types.SimpleNamespace(WEKO_ADMIN_MANAGEMENT_OPTIONS={"detail_condition": []})) + + access_path = "item_1.attribute_value_mlt.subitem_access_right" + with app.app_context(): + # When access_path is set (accessRights should be found) + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) + monkeypatch.setattr("weko_records.utils.update_embargo_rights", lambda meta: None) + from weko_records.utils import json_loader + dc, jrc, is_edit = json_loader(minimal_item_data, minimal_pid) + assert "accessRights" in jrc + assert "open access" in jrc["accessRights"] + + # When access_path is not set (accessRights should not exist) + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {}) + dc, jrc, is_edit = json_loader(minimal_item_data, minimal_pid) + assert "accessRights" not in jrc + + # When config is False (accessRights should not exist) + app.config["WEKO_SEARCH_FIX_ACCESSRIGHTS"] = False + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) + dc, jrc, is_edit = json_loader(minimal_item_data, minimal_pid) + assert "accessRights" not in jrc + + # When access_path is set but value cannot be found (accessRights should not exist) + app.config["WEKO_SEARCH_FIX_ACCESSRIGHTS"] = True + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) + dc, jrc, is_edit = json_loader(minimal_item_data2, minimal_pid) + assert "accessRights" not in jrc + + # When access_path is set but value cannot be found (accessRights should not exist) + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) + dc, jrc, is_edit = json_loader(minimal_item_data3, minimal_pid) + assert "accessRights" not in jrc + # def get_author_link(author_link, value) # .tox/c1/bin/pytest --cov=weko_records tests/test_utils.py::test_get_author_link -v -s -vv --cov-branch --cov-report=term --cov-report=html --cov-config=tox.ini --basetemp=/code/modules/weko-records/.tox/c1/tmp @@ -2751,23 +2857,39 @@ def test_update_embargo_rights(app, monkeypatch): cfg = True meta = copy.deepcopy(base_meta) access_path = "item_1736146823660.attribute_value_mlt.subitem_access_right" - monkeypatch.setattr(utils, "current_app", MagicMock(config={"WEKO_SEARCH_FIX_ACCESSRIGHTS": cfg})) + access_right_type_uri = { + "embargoed access": "http://purl.org/coar/access_right/c_f1cf", + "metadata only access": "http://purl.org/coar/access_right/c_14cb", + "open access": "http://purl.org/coar/access_right/c_abf2", + "restricted access": "http://purl.org/coar/access_right/c_16ec", + } + monkeypatch.setattr( + utils, + "current_app", + MagicMock(config={ + "WEKO_SEARCH_FIX_ACCESSRIGHTS": cfg, + "ACCESS_RIGHT_TYPE_URI": access_right_type_uri, + }) + ) monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (True, "open access")) utils.update_embargo_rights(meta) assert meta["item_1736146823660"]["attribute_value_mlt"][0]["subitem_access_right"] == "open access" + assert meta["item_1736146823660"]["attribute_value_mlt"][0]["subitem_access_right_uri"] == "http://purl.org/coar/access_right/c_abf2" # Embargo is released and access becomes restricted_access meta = copy.deepcopy(base_meta) monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (True, "restricted access")) utils.update_embargo_rights(meta) assert meta["item_1736146823660"]["attribute_value_mlt"][0]["subitem_access_right"] == "restricted access" + assert meta["item_1736146823660"]["attribute_value_mlt"][0]["subitem_access_right_uri"] == "http://purl.org/coar/access_right/c_16ec" # Embargo is not released (returns None), access remains embargoed meta = copy.deepcopy(base_meta) monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (False, None)) utils.update_embargo_rights(meta) assert meta["item_1736146823660"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" + assert meta["item_1736146823660"]["attribute_value_mlt"][0]["subitem_access_right_uri"] == "http://purl.org/coar/access_right/c_f1cf" # File information with accessrole=None is handled correctly meta = copy.deepcopy(base_meta) @@ -2775,6 +2897,7 @@ def test_update_embargo_rights(app, monkeypatch): monkeypatch.setattr(utils, "check_embargo_rights", lambda a, t, d: (False, None)) utils.update_embargo_rights(meta) assert meta["item_1736146823660"]["attribute_value_mlt"][0]["subitem_access_right"] == "embargoed access" + assert meta["item_1736146823660"]["attribute_value_mlt"][0]["subitem_access_right_uri"] == "http://purl.org/coar/access_right/c_f1cf" # Access rights information with None is handled correctly meta = copy.deepcopy(base_meta) diff --git a/modules/weko-records/weko_records/utils.py b/modules/weko-records/weko_records/utils.py index 92de0aeacd..47493cc89b 100644 --- a/modules/weko-records/weko_records/utils.py +++ b/modules/weko-records/weko_records/utils.py @@ -313,7 +313,8 @@ def _get_nested_value(data, path): else: return None access_right = _get_nested_value(jrc["_item_metadata"], access_path) if access_path else None - jrc["accessRights"] = [access_right] if access_right else [] + if access_right: + jrc["accessRights"] = [access_right] del ojson, mjson, item return dc, jrc, is_edit diff --git a/modules/weko-search-ui/tests/test_query.py b/modules/weko-search-ui/tests/test_query.py index 1ecf8a343a..b71de99d7f 100644 --- a/modules/weko-search-ui/tests/test_query.py +++ b/modules/weko-search-ui/tests/test_query.py @@ -952,18 +952,18 @@ def test_split_text_by_or(): assert _split_text_by_or("AAA OR OR BBB") == ["AAA", "", "BBB"] assert _split_text_by_or("OR AAA |") == ["OR AAA |"] -@pytest.mark.parametrize("fix_accessrights, accessrights, expected_should, expected_key", [ - (True, "", False, None), # empty - (True, "open access", False, "open access"), # single value - (True, "embargoed access", False, "embargoed access"), - (True, "restricted access", False, "restricted access"), - (True, "metadata only access", False, "metadata only access"), - (True, "open access,embargoed access", True, "open access"), # multiple values - (True, "invalid access", False, None), # invalid value - (False, "open access", False, None), # config disabled +@pytest.mark.parametrize("fix_accessrights, accessrights, expected_key", [ + (True, "", None), # empty + (True, "open access", "open access"), # single value + (True, "embargoed access", "embargoed access"), + (True, "restricted access", "restricted access"), + (True, "metadata only access", "metadata only access"), + (True, "open access,embargoed access", "open access"), # multiple values + (True, "invalid access", None), # invalid value + (False, "open access", "open access"), # config disabled ]) # .tox/c1/bin/pytest --cov=weko_search_ui tests/test_query.py::test_accessrights_query_param -vv -s --cov-branch --cov-report=xml --basetemp=/code/modules/weko-search-ui/.tox/c1/tmp -def test_accessrights_query_param(app, users, fix_accessrights, accessrights, expected_should, expected_key, mocker): +def test_accessrights_query_param(app, users, fix_accessrights, accessrights, expected_key, mocker): ACCESS_RIGHTS_CHOICES = [ 'embargoed access', 'metadata only access', @@ -990,7 +990,6 @@ def test_accessrights_query_param(app, users, fix_accessrights, accessrights, ex mocker.patch("weko_search_ui.permissions.search_permission",side_effect=MockSearchPerm) app.extensions['invenio-oauth2server'] = 1 app.extensions['invenio-queues'] = 1 - mock_searchperm = MagicMock(side_effect=MockSearchPerm) search_result, urlkwargs = default_search_factory(self=None, search=search) query = search_result.query().to_dict() must_result = query["query"]["bool"]["filter"][0]["bool"]["must"] diff --git a/modules/weko-search-ui/weko_search_ui/config.py b/modules/weko-search-ui/weko_search_ui/config.py index b969f19bf9..d200c02707 100644 --- a/modules/weko-search-ui/weko_search_ui/config.py +++ b/modules/weko-search-ui/weko_search_ui/config.py @@ -783,5 +783,5 @@ WEKO_ACCESS_RIGHTS_CHOICES = [ "embargoed access", "metadata only access", "open access", "restricted access"] -WEKO_SEARCH_FIX_ACCESSRIGHTS = True +WEKO_SEARCH_FIX_ACCESSRIGHTS = False """ If true, the value of accessrights will be modified. """ From a5c0f46e22b92b705d916c7432c2da593555b7bf Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Tue, 7 Apr 2026 15:48:15 +0900 Subject: [PATCH 16/28] fix Embargo Updated --- .../invenio_oaiserver/query.py | 22 ++++-- .../invenio-records/invenio_records/api.py | 76 +++++++++++++++++++ .../invenio_resourcesyncserver/query.py | 43 +++++++---- .../weko-search-ui/weko_search_ui/query.py | 76 +++++++++++++++++++ 4 files changed, 195 insertions(+), 22 deletions(-) diff --git a/modules/invenio-oaiserver/invenio_oaiserver/query.py b/modules/invenio-oaiserver/invenio_oaiserver/query.py index 9c492bca1d..a66afe4e44 100644 --- a/modules/invenio-oaiserver/invenio_oaiserver/query.py +++ b/modules/invenio-oaiserver/invenio_oaiserver/query.py @@ -20,6 +20,7 @@ from weko_index_tree.models import Index from weko_schema_ui.models import PublishStatus from werkzeug.utils import cached_property, import_string +from weko_search_ui.query import range_query from . import current_oaiserver @@ -194,13 +195,20 @@ def get_descendant_ids(index_id): index_ids = [sets] + get_descendant_ids(sets) search = search.query('terms', **{'_oai.sets': index_ids}) - time_range = {} - if 'from_' in kwargs: - time_range['gte'] = kwargs['from_'] - if 'until' in kwargs: - time_range['lte'] = kwargs['until'] - if time_range: - search = search.filter('range', **{'_updated': time_range}) + if not current_app.config.get('WEKO_SEARCH_FIX_ACCESSRIGHTS', False): + time_range = {} + if 'from_' in kwargs: + time_range['gte'] = kwargs['from_'] + if 'until' in kwargs: + time_range['lte'] = kwargs['until'] + if time_range: + search = search.filter('range', **{'_updated': time_range}) + else: + if 'from_' in kwargs or 'until' in kwargs: + now = datetime.now().isoformat() + rq = range_query(now, kwargs.get('from_'), kwargs.get('until')) + if rq is not None: + search = search.filter(rq) search = search.query('match', **{'relation_version_is_last': 'true'}) search = search.query('terms', **{'publish_status': [ diff --git a/modules/invenio-records/invenio_records/api.py b/modules/invenio-records/invenio_records/api.py index 4d3419cc17..5b1f979b56 100644 --- a/modules/invenio-records/invenio_records/api.py +++ b/modules/invenio-records/invenio_records/api.py @@ -17,6 +17,7 @@ from jsonpatch import apply_patch from sqlalchemy.orm.attributes import flag_modified from werkzeug.local import LocalProxy +from datetime import datetime, time, date from .errors import MissingModelError from .models import RecordMetadata @@ -377,6 +378,81 @@ def revisions(self): return RevisionsIterator(self.model) + @property + def updated(self): + original_updated = self.model.updated + if not current_app.config.get('WEKO_SEARCH_FIX_ACCESSRIGHTS', False): + return original_updated + db.session.expire_all() + metadata = self.model.json if self.model else None + item_type_id = metadata.get("item_type_id") if metadata else None + from weko_records.serializers.utils import get_mapping + mapping = get_mapping(item_type_id, "jpcoar_mapping") if item_type_id else None + access_path = mapping.get("accessRights.@value") if mapping else None + + def _get_nested_value(data, path): + keys = path.split('.') + key = keys[0] + rest = '.'.join(keys[1:]) + if isinstance(data, dict): + if key in data: + if rest: + return _get_nested_value(data[key], rest) + else: + return data[key] + elif 'attribute_value_mlt' in data: + for item in data['attribute_value_mlt']: + found = _get_nested_value(item, '.'.join(keys)) + if found is not None: + return found + return None + else: + return None + elif isinstance(data, list): + for item in data: + found = _get_nested_value(item, '.'.join(keys)) + if found is not None: + return found + return None + else: + return None + + access_rights = _get_nested_value(metadata, access_path) if metadata and access_path else None + accessrole_date = [] + for v in metadata.values() if metadata else []: + if isinstance(v, dict) and v.get("attribute_type") == "file": + for file_info in v.get("attribute_value_mlt", []): + date_list = file_info.get("date", []) + date_value = date_list[0].get("dateValue") if date_list else None + accessrole = file_info.get("accessrole") + if date_value and accessrole: + date_obj = datetime.strptime(date_value, "%Y-%m-%d").date() + accessrole_date.append((accessrole, date_obj)) + + today = datetime.now().date() + + from weko_records.utils import check_embargo_rights + is_update, change_value = check_embargo_rights( + access_rights, today, accessrole_date + ) + + if not (is_update and change_value == "open access"): + return original_updated + + open_dates = [] + for role, date_str in accessrole_date: + dt = datetime.combine(date_str, time(0, 0, 0)) + dt = datetime.combine(dt.date(), time(0, 0, 0)) + open_dates.append(dt) + + if not open_dates: + return original_updated + + latest_open_date = max(open_dates) + if original_updated: + return max(original_updated, latest_open_date) + else: + return latest_open_date class RecordRevision(RecordBase): """API for record revisions.""" diff --git a/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py b/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py index f2d381b557..ff58ba1196 100644 --- a/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py +++ b/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py @@ -28,7 +28,9 @@ from weko_index_tree.api import Indexes from weko_schema_ui.models import PublishStatus from weko_search_ui.utils import execute_search_with_pagination +from weko_search_ui.query import range_query +from datetime import datetime from .config import WEKO_ROOT_INDEX @@ -262,14 +264,20 @@ def _get_index_search_query(_date_from: str, _date_until: str) -> dict: "path": list_path } } - post_filter['bool']['must'].append({ - "range": { - "_updated": { - "lte": _date_until, - "gte": _date_from + if not current_app.config.get('WEKO_SEARCH_FIX_ACCESSRIGHTS', False): + post_filter['bool']['must'].append({ + "range": { + "_updated": { + "lte": _date_until, + "gte": _date_from + } } - } - }) + }) + else: + now = datetime.now().isoformat() + rq = range_query(now, _date_from, _date_until) + if rq is not None: + post_filter['bool']['must'].append(rq.to_dict()) # create search query try: query_q = json.dumps(query_q).replace("@index", q) @@ -285,14 +293,20 @@ def _get_index_search_query(_date_from: str, _date_until: str) -> dict: "path": list_path } }) - post_filter['bool']['must'].append({ - "range": { - "_updated": { - "lte": _date_until, - "gte": _date_from + if not current_app.config.get('WEKO_SEARCH_FIX_ACCESSRIGHTS', False): + post_filter['bool']['must'].append({ + "range": { + "_updated": { + "lte": _date_until, + "gte": _date_from + } } - } - }) + }) + else: + now = datetime.now().isoformat() + rq = range_query(now, _date_from, _date_until) + if rq is not None: + post_filter['bool']['must'].append(rq.to_dict()) # create search query wild_card = [] child_list = Indexes.get_child_list(q) @@ -320,7 +334,6 @@ def _get_index_search_query(_date_from: str, _date_until: str) -> dict: # create a index search query query_q = _get_index_search_query(date_from, date_until) - try: search.update_from_dict(query_q) except SyntaxError: diff --git a/modules/weko-search-ui/weko_search_ui/query.py b/modules/weko-search-ui/weko_search_ui/query.py index 2c1c1d3352..b4cedb244c 100644 --- a/modules/weko-search-ui/weko_search_ui/query.py +++ b/modules/weko-search-ui/weko_search_ui/query.py @@ -1519,3 +1519,79 @@ def _split_text_by_or(text): split_text_list = re.split(pattern, text) split_text_list = [item.strip() for item in split_text_list] return split_text_list + +def range_query(now, _from=None, _until=None): + """ + Generate a search query considering update date changes. + + :param now: Current time (str) + :param _from: Lower bound of update date (str or None) + :param _until: Upper bound of update date (str or None) + :return: elasticsearch_dsl.query.Q or None + """ + if _from is None and _until is None: + return None + + # First should condition + must_not_embargoed = Q('bool', must_not=[Q('term', accessRights='embargoed access')]) + must_not_content_accessrole = Q('bool', must_not=[Q('nested', path='content', query=Q('exists', field='content.accessrole.raw'))]) + must_not_open_access = Q('nested', path='content', query=Q('bool', must_not=[ + Q('term', **{'content.accessrole.raw': 'open_access'}), + Q('bool', must=[ + Q('term', **{'content.accessrole.raw': 'open_date'}), + Q('range', **{'content.date.dateValue.raw': {'lte': now}}) + ]) + ])) + should1 = Q('bool', must=[ + Q('bool', should=[must_not_embargoed, must_not_content_accessrole, must_not_open_access]), + Q('range', **{ + '_updated': { + **({'gte': _from} if _from else {}), + **({'lte': _until} if _until else {}) + } + }) + ]) + + # Second should condition + must_not_open_access2 = Q('nested', path='content', query=Q('bool', must_not=[ + Q('term', **{'content.accessrole.raw': 'open_access'}), + Q('bool', must=[ + Q('term', **{'content.accessrole.raw': 'open_date'}), + Q('range', **{'content.date.dateValue.raw': {'lte': now}}) + ]) + ])) + + # from condition + from_should = [] + if _from: + from_should.append(Q('nested', path='content', query=Q('bool', must=[ + Q('term', **{'content.accessrole.raw': 'open_date'}), + Q('range', **{'content.date.dateValue.raw': {'gte': _from}}) + ]))) + from_should.append(Q('range', **{'_updated': {'gte': _from}})) + + # until condition + until_must = [] + if _until: + until_must.append(Q('bool', must_not=[ + Q('nested', path='content', query=Q('bool', must=[ + Q('term', **{'content.accessrole.raw': 'open_date'}), + Q('range', **{'content.date.dateValue.raw': {'gt': _until}}) + ])) + ])) + until_must.append(Q('range', **{'_updated': {'lte': _until}})) + + must2 = [ + Q('term', accessRights='embargoed access'), + Q('nested', path='content', query=Q('exists', field='content.accessrole.raw')), + Q('bool', must_not=[must_not_open_access2]) + ] + if from_should: + must2.append(Q('bool', should=from_should)) + if until_must: + must2.extend(until_must) + + should2 = Q('bool', must=must2) + + # Overall should + return Q('bool', should=[should1, should2]) \ No newline at end of file From 9005267874bf6382732feb2d102dace385b2b57d Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Thu, 9 Apr 2026 14:19:14 +0900 Subject: [PATCH 17/28] Fixes for issues identified in unit tests --- modules/weko-admin/tests/test_utils.py | 71 ++++++++++++++++++++++ modules/weko-search-ui/tests/test_query.py | 2 - 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/modules/weko-admin/tests/test_utils.py b/modules/weko-admin/tests/test_utils.py index 4a88db502e..8a8abf686b 100755 --- a/modules/weko-admin/tests/test_utils.py +++ b/modules/weko-admin/tests/test_utils.py @@ -2497,6 +2497,77 @@ def test_get_facet_search_query(app,mocker): # .tox/c1/bin/pytest --cov=weko_admin tests/test_utils.py::test_get_title_facets -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/weko-admin/.tox/c1/tmp def test_get_title_facets(app,facet_search_settings): with app.test_request_context(headers=[('Accept-Language', 'en')]): + titles, order = get_title_facets() + assert titles == {"Data Language":"Data Language","Data Type":"Data Type","raw_test":"raw_test"} + assert order == {1:"Data Language",3:"Data Type",4:"raw_test"} + + +# def is_exits_facet(data, id): +# .tox/c1/bin/pytest --cov=weko_admin tests/test_utils.py::test_is_exits_facet -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/weko-admin/.tox/c1/tmp +def test_is_exits_facet(app, facet_search_settings): + with app.test_request_context(headers=[('Accept-Language', 'en')]): + # not id > 0 + result = is_exits_facet({"name_en":"Data Type","name_jp":"データタイプ","mapping":"description.value"},None) + assert result == True + result = is_exits_facet({"name_en":"not exist facet","name_jp":"存在しないファセット","mapping":"not exist mapping"},None) + assert result == False + + # id > 0 + result = is_exits_facet({"name_en":"Data Type","name_jp":"データタイプ","mapping":"description.value"},"3") + assert result == False + result = is_exits_facet({"name_en":"Data Type","name_jp":"データタイプ","mapping":"description.value"},"100") + assert result == True + +# def overwrite_the_memory_config_with_db(app, site_info): +# .tox/c1/bin/pytest --cov=weko_admin tests/test_utils.py::test_overwrite_the_memory_config_with_db -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/weko-admin/.tox/c1/tmp +def test_overwrite_the_memory_config_with_db(app,client,site_info): + from flask import Flask + + site_info_not_google = SiteInfo( + site_name=[{"name":"test_site_info"}], + notify={"name":"test_notify"} + ) + + site_info_google1 = SiteInfo( + site_name=[{"name":"test_site_info"}], + notify={"name":"test_notify"}, + google_tracking_id_user="test_tracking_id1", + ) + site_info_google2 = SiteInfo( + site_name=[{"name":"test_site_info"}], + notify={"name":"test_notify"}, + google_tracking_id_user="test_tracking_id2", + ) + + app = Flask("test_weko_admin_app") + # site_info is None + overwrite_the_memory_config_with_db(app, None) + + # site_info.google_tracking_id_user is not exist + overwrite_the_memory_config_with_db(app, site_info_not_google) + + # GOOGLE_TRACKING_ID_USER is not exist + overwrite_the_memory_config_with_db(app, site_info_google1) + assert app.config["GOOGLE_TRACKING_ID_USER"] == "test_tracking_id1" + + overwrite_the_memory_config_with_db(app, site_info_google2) + assert app.config["GOOGLE_TRACKING_ID_USER"] == "test_tracking_id2" + +import json +import pytest +from flask import current_app, make_response, request, url_for +from flask_login import current_user +from mock import patch + +from weko_admin.utils import ( + get_title_facets +) + +# def get_title_facets(): +def test_get_title_facets(app, users, facet_search_settings): + #facet_search_setting = json_data("data/test_facet.json") + with app.test_request_context(headers=[('Accept-Language', 'en')]): + #with patch("weko_admin.models.FacetSearchSetting.get_activated_facets", return_value=facet_search_setting): titles, order, uiTypes, isOpens, displayNumbers, searchConditions = get_title_facets() assert uiTypes assert isOpens diff --git a/modules/weko-search-ui/tests/test_query.py b/modules/weko-search-ui/tests/test_query.py index b71de99d7f..431d1b4da4 100644 --- a/modules/weko-search-ui/tests/test_query.py +++ b/modules/weko-search-ui/tests/test_query.py @@ -2,8 +2,6 @@ import copy import pytest -from datetime import datetime, timezone -from flask import current_app, request from elasticsearch_dsl.query import Match, Range, Terms, Bool from mock import patch, MagicMock from werkzeug.datastructures import MultiDict From 3a8f5a52d901174d9cffbc1ac99fd9c20e2bd28f Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Thu, 9 Apr 2026 14:53:12 +0900 Subject: [PATCH 18/28] fix updated date: Correction of Issues Pointed Out --- .../invenio_oaiserver/query.py | 18 +++++++-------- .../invenio-records/invenio_records/api.py | 7 ++++-- .../invenio_resourcesyncserver/query.py | 22 +++++++++---------- .../weko-search-ui/weko_search_ui/query.py | 20 +++++++++-------- 4 files changed, 36 insertions(+), 31 deletions(-) diff --git a/modules/invenio-oaiserver/invenio_oaiserver/query.py b/modules/invenio-oaiserver/invenio_oaiserver/query.py index a66afe4e44..7aaab5d2ab 100644 --- a/modules/invenio-oaiserver/invenio_oaiserver/query.py +++ b/modules/invenio-oaiserver/invenio_oaiserver/query.py @@ -153,7 +153,7 @@ def get_descendant_ids(index_id): descendant_ids = db.session.query(cte.c.descendant_id).all() return [descendant_id[0] for descendant_id in descendant_ids] - + page_ = kwargs.get('resumptionToken', {}).get('page', 1) size_ = current_app.config['OAISERVER_PAGE_SIZE'] @@ -188,14 +188,20 @@ def get_descendant_ids(index_id): #search = search.query('match', **{'path': kwargs['set']}) #search = search.query('match', **{'_oai.sets': sets}) #search = search.query('terms', **{'_oai.sets': sets}) - + if not sets: search = search.query('match_none') else: index_ids = [sets] + get_descendant_ids(sets) search = search.query('terms', **{'_oai.sets': index_ids}) - if not current_app.config.get('WEKO_SEARCH_FIX_ACCESSRIGHTS', False): + if current_app.config.get('WEKO_SEARCH_FIX_ACCESSRIGHTS', False): + if 'from_' in kwargs or 'until' in kwargs: + now = datetime.now().isoformat() + rq = range_query(now, kwargs.get('from_'), kwargs.get('until')) + if rq is not None: + search = search.filter(rq) + else: time_range = {} if 'from_' in kwargs: time_range['gte'] = kwargs['from_'] @@ -203,12 +209,6 @@ def get_descendant_ids(index_id): time_range['lte'] = kwargs['until'] if time_range: search = search.filter('range', **{'_updated': time_range}) - else: - if 'from_' in kwargs or 'until' in kwargs: - now = datetime.now().isoformat() - rq = range_query(now, kwargs.get('from_'), kwargs.get('until')) - if rq is not None: - search = search.filter(rq) search = search.query('match', **{'relation_version_is_last': 'true'}) search = search.query('terms', **{'publish_status': [ diff --git a/modules/invenio-records/invenio_records/api.py b/modules/invenio-records/invenio_records/api.py index 5b1f979b56..64e7f644b6 100644 --- a/modules/invenio-records/invenio_records/api.py +++ b/modules/invenio-records/invenio_records/api.py @@ -17,7 +17,7 @@ from jsonpatch import apply_patch from sqlalchemy.orm.attributes import flag_modified from werkzeug.local import LocalProxy -from datetime import datetime, time, date +from datetime import datetime, time from .errors import MissingModelError from .models import RecordMetadata @@ -417,7 +417,10 @@ def _get_nested_value(data, path): else: return None - access_rights = _get_nested_value(metadata, access_path) if metadata and access_path else None + access_rights = ( + _get_nested_value(metadata, access_path) + if metadata and access_path else None + ) accessrole_date = [] for v in metadata.values() if metadata else []: if isinstance(v, dict) and v.get("attribute_type") == "file": diff --git a/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py b/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py index ff58ba1196..96baff8d59 100644 --- a/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py +++ b/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py @@ -264,7 +264,12 @@ def _get_index_search_query(_date_from: str, _date_until: str) -> dict: "path": list_path } } - if not current_app.config.get('WEKO_SEARCH_FIX_ACCESSRIGHTS', False): + if current_app.config.get('WEKO_SEARCH_FIX_ACCESSRIGHTS', False): + now = datetime.now().isoformat() + rq = range_query(now, _date_from, _date_until) + if rq is not None: + post_filter['bool']['must'].append(rq.to_dict()) + else: post_filter['bool']['must'].append({ "range": { "_updated": { @@ -273,11 +278,6 @@ def _get_index_search_query(_date_from: str, _date_until: str) -> dict: } } }) - else: - now = datetime.now().isoformat() - rq = range_query(now, _date_from, _date_until) - if rq is not None: - post_filter['bool']['must'].append(rq.to_dict()) # create search query try: query_q = json.dumps(query_q).replace("@index", q) @@ -294,6 +294,11 @@ def _get_index_search_query(_date_from: str, _date_until: str) -> dict: } }) if not current_app.config.get('WEKO_SEARCH_FIX_ACCESSRIGHTS', False): + now = datetime.now().isoformat() + rq = range_query(now, _date_from, _date_until) + if rq is not None: + post_filter['bool']['must'].append(rq.to_dict()) + else: post_filter['bool']['must'].append({ "range": { "_updated": { @@ -302,11 +307,6 @@ def _get_index_search_query(_date_from: str, _date_until: str) -> dict: } } }) - else: - now = datetime.now().isoformat() - rq = range_query(now, _date_from, _date_until) - if rq is not None: - post_filter['bool']['must'].append(rq.to_dict()) # create search query wild_card = [] child_list = Indexes.get_child_list(q) diff --git a/modules/weko-search-ui/weko_search_ui/query.py b/modules/weko-search-ui/weko_search_ui/query.py index b4cedb244c..00dedc0c67 100644 --- a/modules/weko-search-ui/weko_search_ui/query.py +++ b/modules/weko-search-ui/weko_search_ui/query.py @@ -23,7 +23,7 @@ import json import re import sys -from datetime import datetime, timezone +from datetime import datetime from functools import partial from elasticsearch_dsl.query import Bool, Q @@ -614,7 +614,7 @@ def __get_accessrights_query(params): weko_search_fix_accessrights = current_app.config.get( 'WEKO_SEARCH_FIX_ACCESSRIGHTS', False ) - + accessrights_value = params.get('accessrights') if not accessrights_value: return None @@ -1521,13 +1521,15 @@ def _split_text_by_or(text): return split_text_list def range_query(now, _from=None, _until=None): - """ - Generate a search query considering update date changes. + """Generate a search query considering update date changes. - :param now: Current time (str) - :param _from: Lower bound of update date (str or None) - :param _until: Upper bound of update date (str or None) - :return: elasticsearch_dsl.query.Q or None + Args: + now (str): Current time. + _from (str or None): Lower bound of update date. + _until (str or None): Upper bound of update date. + + Returns: + elasticsearch_dsl.query.Q or None: The generated query object, or None if no range is specified. """ if _from is None and _until is None: return None @@ -1594,4 +1596,4 @@ def range_query(now, _from=None, _until=None): should2 = Q('bool', must=must2) # Overall should - return Q('bool', should=[should1, should2]) \ No newline at end of file + return Q('bool', should=[should1, should2], minimum_should_match=1) From 9bdcb2f829dd3cb07fd07804db9e6cdba96546f8 Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Mon, 13 Apr 2026 17:48:34 +0900 Subject: [PATCH 19/28] enbargoed fix updated unit test --- .../invenio_oaiserver/query.py | 154 +++++++++++++++- modules/invenio-oaiserver/tests/test_query.py | 166 +++++++++++++++--- .../invenio-records/invenio_records/api.py | 2 + modules/invenio-records/tests/test_api.py | 52 ++++++ .../invenio_resourcesyncserver/query.py | 11 +- .../tests/test_query.py | 69 +++++++- .../weko-search-ui/weko_search_ui/query.py | 78 -------- 7 files changed, 416 insertions(+), 116 deletions(-) diff --git a/modules/invenio-oaiserver/invenio_oaiserver/query.py b/modules/invenio-oaiserver/invenio_oaiserver/query.py index 7aaab5d2ab..336d50bc3f 100644 --- a/modules/invenio-oaiserver/invenio_oaiserver/query.py +++ b/modules/invenio-oaiserver/invenio_oaiserver/query.py @@ -20,7 +20,6 @@ from weko_index_tree.models import Index from weko_schema_ui.models import PublishStatus from werkzeug.utils import cached_property, import_string -from weko_search_ui.query import range_query from . import current_oaiserver @@ -197,8 +196,7 @@ def get_descendant_ids(index_id): if current_app.config.get('WEKO_SEARCH_FIX_ACCESSRIGHTS', False): if 'from_' in kwargs or 'until' in kwargs: - now = datetime.now().isoformat() - rq = range_query(now, kwargs.get('from_'), kwargs.get('until')) + rq = range_query(kwargs.get('from_'), kwargs.get('until')) if rq is not None: search = search.filter(rq) else: @@ -296,3 +294,153 @@ def items(self): } return Pagination(response) + +def range_query(_from=None, _until=None): + """Generate a search query considering update date changes. + + Args: + _from (str or None): Lower bound of update date. + _until (str or None): Upper bound of update date. + + Returns: + elasticsearch_dsl.query.Q or None: The generated query object, or None if no range is specified. + """ + if _from is None and _until is None: + return None + + # First should condition + + must_not_embargoed = Q( + 'bool', must_not=[Q('term', accessRights='embargoed access')] + ) + must_not_content_accessrole = Q( + 'bool', must_not=[ + Q( + 'nested', + path='content', + query=Q('exists', field='content.accessrole.raw') + ) + ] + ) + must_not_open_access = Q( + 'nested', + path='content', + query=Q( + 'bool', + must_not=[ + Q('term', **{'content.accessrole.raw': 'open_access'}), + Q( + 'bool', + must=[ + Q('term', **{'content.accessrole.raw': 'open_date'}), + Q('range', **{'content.date.dateValue.raw': {'lte': 'now'}}) + ] + ) + ] + ) + ) + should1 = Q( + 'bool', + must=[ + Q( + 'bool', + should=[ + must_not_embargoed, + must_not_content_accessrole, + must_not_open_access + ] + ), + Q( + 'range', + **{ + '_updated': { + **({'gte': _from} if _from else {}), + **({'lte': _until} if _until else {}) + } + } + ) + ] + ) + + # Second should condition + must_not_open_access2 = Q( + 'nested', + path='content', + query=Q( + 'bool', + must_not=[ + Q('term', **{'content.accessrole.raw': 'open_access'}), + Q( + 'bool', + must=[ + Q('term', **{'content.accessrole.raw': 'open_date'}), + Q('range', **{'content.date.dateValue.raw': {'lte': 'now'}}) + ] + ) + ] + ) + ) + + # from condition + from_should = [] + if _from: + from_should.append( + Q( + 'nested', + path='content', + query=Q( + 'bool', + must=[ + Q('term', **{'content.accessrole.raw': 'open_date'}), + Q('range', **{'content.date.dateValue.raw': {'gte': _from}}) + ] + ) + ) + ) + from_should.append(Q('range', **{'_updated': {'gte': _from}})) + + # until condition + until_must = [] + if _until: + until_must.append( + Q( + 'bool', + must_not=[ + Q( + 'nested', + path='content', + query=Q( + 'bool', + must=[ + Q('term', **{'content.accessrole.raw': 'open_date'}), + Q('range', **{'content.date.dateValue.raw': {'gt': _until}}) + ] + ) + ) + ] + ) + ) + until_must.append(Q('range', **{'_updated': {'lte': _until}})) + + must2 = [ + Q('term', accessRights='embargoed access'), + Q( + 'nested', + path='content', + query=Q('exists', field='content.accessrole.raw') + ), + Q('bool', must_not=[must_not_open_access2]) + ] + if from_should: + must2.append(Q('bool', should=from_should)) + if until_must: + must2.extend(until_must) + + should2 = Q('bool', must=must2) + + # Overall should + return Q( + 'bool', + should=[should1, should2], + minimum_should_match=1 + ) \ No newline at end of file diff --git a/modules/invenio-oaiserver/tests/test_query.py b/modules/invenio-oaiserver/tests/test_query.py index 63c18b2515..11c377689a 100644 --- a/modules/invenio-oaiserver/tests/test_query.py +++ b/modules/invenio-oaiserver/tests/test_query.py @@ -1,4 +1,3 @@ - import pytest from mock import patch import uuid @@ -15,7 +14,8 @@ from invenio_oaiserver.query import ( query_string_parser, get_affected_records, - get_records + get_records, + range_query ) # .tox/c1/bin/pytest --cov=invenio_oaiserver tests/test_query.py -vv -s --cov-branch --cov-report=term --cov-report=html --basetemp=/code/modules/invenio-oaiserver/.tox/c1/tmp @@ -28,7 +28,7 @@ def test_query_string_parser(es_app): assert type(result) == QueryString assert result.name == "query_string" assert result.to_dict() == {"query_string":{"query":"test_path"}} - + # current_oaiserver not have query_parse, config is not str current_app.config.update(OAISERVER_QUERY_PARSER=Q) delattr(current_oaiserver,"query_parser") @@ -36,7 +36,7 @@ def test_query_string_parser(es_app): assert type(result) == QueryString assert result.name == "query_string" assert result.to_dict() == {"query_string":{"query":"test_path"}} - + # current_oaiserver have query_parse result = query_string_parser("test_path") assert type(result) == QueryString @@ -53,23 +53,23 @@ def test_get_affected_records(es_app): result = get_affected_records(None,None) for i in result: pass - + spec="1671155386910" search_path = 'path:"1671155386910"' # exist spec, not exist search_path result = get_affected_records(spec,None) for i in result: assert i - + # not exist spec, exist search_path result = get_affected_records(None,search_path) for i in result: assert i - + result = get_affected_records(spec,search_path) for i in result: assert i - + #def get_records(**kwargs): # .tox/c1/bin/pytest --cov=invenio_oaiserver tests/test_query.py::test_get_records -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/invenio-oaiserver/.tox/c1/tmp @@ -102,9 +102,9 @@ def test_get_records(es_app,db, mock_execute): db.session.add_all(indexes) db.session.add(rec1) db.session.add(rec2) - + db.session.commit() - + es_info = dict(id=str(rec_uuid1), index=current_app.config['INDEXER_DEFAULT_INDEX'], doc_type=current_app.config['INDEXER_DEFAULT_DOCTYPE']) @@ -119,21 +119,21 @@ def test_get_records(es_app,db, mock_execute): version_type='external_gte', body=rec_data2) current_search_client.index(**{**es_info,**body}) - + # not scroll_id, ":" not in set data = { "set":"12345" } result = get_records(**data) assert result - + # not scroll_id, ":" in set data = { "set":"12345:6789" } result = get_records(**data) assert result - + # not scroll_id, "set" not in data, exist "from_","until" in data data = { "from_":"2022-01-01", @@ -141,7 +141,7 @@ def test_get_records(es_app,db, mock_execute): } result = get_records(**data) assert result - + # in scroll_id data = { "resumptionToken":{"page":1,"scroll_id":"DXF1ZXJ5QW5kRmV0Y2gBAAAAAAAAVfgWYmVhQ3BkbEdSSm0wS3pTaEdQeHQ1QQ=="} @@ -201,7 +201,7 @@ def test_get_records_with_set(es_app,db, users): browsing_role="3,-99" ) ) - + rec_uuid1 = uuid.uuid4() rec_data1 = {"title":["test_item1"], "path":["123"], @@ -212,7 +212,7 @@ def test_get_records_with_set(es_app,db, users): "_updated": "2022-01-01T00:00:00" } rec1 = RecordMetadata(id=rec_uuid1,json=rec_data1) - + rec_uuid2 = uuid.uuid4() rec_data2 = {"title":["test_item2"], "path":["456"], @@ -223,7 +223,7 @@ def test_get_records_with_set(es_app,db, users): "_updated": "2022-01-01T00:00:00" } rec2 = RecordMetadata(id=rec_uuid2,json=rec_data2) - + rec_uuid3 = uuid.uuid4() rec_data3 = {"title":["test_item3"], "path":["789"], @@ -234,13 +234,13 @@ def test_get_records_with_set(es_app,db, users): "_updated": "2022-01-01T00:00:00" } rec3 = RecordMetadata(id=rec_uuid3,json=rec_data3) - + db.session.add_all(indexes) db.session.add(rec1) db.session.add(rec2) db.session.add(rec3) db.session.commit() - + es_info = dict(index=current_app.config['INDEXER_DEFAULT_INDEX'], doc_type=current_app.config['INDEXER_DEFAULT_DOCTYPE'], version=1, @@ -252,14 +252,14 @@ def test_get_records_with_set(es_app,db, users): current_search_client.index(**es_info,**body1) current_search_client.index(**es_info,**body2) current_search_client.index(**es_info,**body3) - + comm1 = Community.create(community_id="test_comm", role_id=users[0]["id"], id_user=users[0]["id"], title="test community", description="this is test community", root_node_id=indexes[0].id) db.session.add(comm1) db.session.commit() - + data = {"set":"123"} result = get_records(**data) assert result.total == 3 @@ -267,20 +267,20 @@ def test_get_records_with_set(es_app,db, users): assert result_items[0]["json"]["_source"] == rec_data1 assert result_items[1]["json"]["_source"] == rec_data2 assert result_items[2]["json"]["_source"] == rec_data3 - + data = {"set":"123:456"} result = get_records(**data) assert result.total == 2 result_items = [r for r in result.items] assert result_items[0]["json"]["_source"] == rec_data2 assert result_items[1]["json"]["_source"] == rec_data3 - + data = {"set":"123:456:789"} result = get_records(**data) assert result.total == 1 result_items = [r for r in result.items] assert result_items[0]["json"]["_source"] == rec_data3 - + data = {"set":"user-test_comm"} result = get_records(**data) assert result.total == 3 @@ -288,7 +288,7 @@ def test_get_records_with_set(es_app,db, users): assert result_items[0]["json"]["_source"] == rec_data1 assert result_items[1]["json"]["_source"] == rec_data2 assert result_items[2]["json"]["_source"] == rec_data3 - + data = {"set":"test_comm"} result = get_records(**data) assert result.total == 3 @@ -296,11 +296,123 @@ def test_get_records_with_set(es_app,db, users): assert result_items[0]["json"]["_source"] == rec_data1 assert result_items[1]["json"]["_source"] == rec_data2 assert result_items[2]["json"]["_source"] == rec_data3 - + data = {"set":"999"} result = get_records(**data) assert result.total == 0 - + data = {"set":"aaa"} result = get_records(**data) assert result.total == 0 + +# .tox/c1/bin/pytest --cov=invenio_oaiserver tests/test_query.py::test_range_query -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/invenio-oaiserver/.tox/c1/tmp +def test_range_query(): + # Case: both _from and _until are None + result = range_query(None, None) + assert result is None + + # Case: only _from is specified + _from = '2026-01-01' + result = range_query(_from, None) + assert result is not None + d = result.to_dict() + # Check that 'gte' appears in should2 (from_should is active) + should2 = d['bool']['should'][1]['bool']['must'] + assert any('gte' in str(x) for x in should2) + + # Case: only _until is specified + _until = '2026-12-31' + result = range_query(None, _until) + assert result is not None + d = result.to_dict() + # Check that 'lte' appears in should2 (until_must is active) + should2 = d['bool']['should'][1]['bool']['must'] + assert any('lte' in str(x) for x in should2) + + # Case: both _from and _until are specified + _from = '2026-01-01' + _until = '2026-12-31' + result = range_query(_from, _until) + assert result is not None + d = result.to_dict() + should2 = d['bool']['should'][1]['bool']['must'] + # Both from_should and until_must should be active + assert any('gte' in str(x) for x in should2) + assert any('lte' in str(x) for x in should2) + +@pytest.mark.parametrize("fix_access, from_, until, expect_range, expect_rq", [ + (False, None, None, False, False), + (False, "2026-01-01", None, True, False), + (False, None, "2026-12-31", True, False), + (False, "2026-01-01", "2026-12-31", True, False), + (True, None, None, False, False), + (True, "2026-01-01", None, True, True), + (True, None, "2026-12-31", True, True), + (True, "2026-01-01", "2026-12-31", True, True), +]) +# .tox/c1/bin/pytest --cov=invenio_oaiserver tests/test_query.py::test_get_records_range_branch -v -s -vv --cov-branch --cov-report=term --cov-config=tox.ini --basetemp=/code/modules/invenio-oaiserver/.tox/c1/tmp +def test_get_records_range_branch(es_app, db, monkeypatch, fix_access, from_, until, expect_range, expect_rq): + es_app.config['WEKO_SEARCH_FIX_ACCESSRIGHTS'] = fix_access + + index = Index( + id=30, + parent=0, + position=1, + index_name_english="range_index", + index_link_name_english="range_index_link", + harvest_public_state=True, + public_state=True, + public_date=datetime(2100,1,1), + browsing_role="3,-99" + ) + db.session.add(index) + rec_uuid = uuid.uuid4() + PersistentIdentifier.create('doi', "https://doi.org/00030", object_type='rec', object_uuid=rec_uuid, status=PIDStatus.REGISTERED) + rec_data = { + "title": ["range_item"], + "path": ["30"], + "_oai": {"id": "oai:test:00030", "sets": ["30"]}, + "set": ["30"], + "relation_version_is_last": "true", + "control_number": "30", + "publish_status": "0", + "_updated": "2100-01-01T00:00:00" + } + rec = RecordMetadata(id=rec_uuid, json=rec_data) + db.session.add(rec) + db.session.commit() + es_info = dict( + id=str(rec_uuid), + index=es_app.config['INDEXER_DEFAULT_INDEX'], + doc_type=es_app.config['INDEXER_DEFAULT_DOCTYPE'], + refresh="wait_for" + ) + body = dict(version=1, version_type="external_gte", body=rec_data) + from invenio_search import current_search_client + current_search_client.index(**{**es_info, **body}) + current_search_client.indices.refresh(index=es_app.config['INDEXER_DEFAULT_INDEX']) + + called = {"filter": False, "rq": False} + import invenio_oaiserver.query as query_mod + + orig_filter = query_mod.OAIServerSearch.filter + def filter_spy(self, *args, **kwargs): + called["filter"] = True + return orig_filter(self, *args, **kwargs) + monkeypatch.setattr(query_mod.OAIServerSearch, "filter", filter_spy) + + orig_rq = query_mod.range_query + def rq_spy(_from, _until): + called["rq"] = True + return orig_rq(_from, _until) + monkeypatch.setattr(query_mod, "range_query", rq_spy) + + with es_app.app_context(): + kwargs = {"set": "30"} + if from_: + kwargs["from_"] = from_ + if until: + kwargs["until"] = until + get_records(**kwargs) + assert called["filter"] == expect_range + assert called["rq"] == expect_rq diff --git a/modules/invenio-records/invenio_records/api.py b/modules/invenio-records/invenio_records/api.py index 64e7f644b6..95c59eae09 100644 --- a/modules/invenio-records/invenio_records/api.py +++ b/modules/invenio-records/invenio_records/api.py @@ -380,6 +380,8 @@ def revisions(self): @property def updated(self): + if not self.model: + return None original_updated = self.model.updated if not current_app.config.get('WEKO_SEARCH_FIX_ACCESSRIGHTS', False): return original_updated diff --git a/modules/invenio-records/tests/test_api.py b/modules/invenio-records/tests/test_api.py index 0298c35caa..1c0b131bdd 100644 --- a/modules/invenio-records/tests/test_api.py +++ b/modules/invenio-records/tests/test_api.py @@ -392,3 +392,55 @@ def test_validate_partial(app, db): record['a'] = 1 with pytest.raises(ValidationError) as exc_info: record.commit(validator=PartialDraft4Validator) + +@pytest.mark.parametrize("fix_accessrights, updated, meta_patch, expected", [ + # WEKO_SEARCH_FIX_ACCESSRIGHTS=True + (True, None, {}, None), + (True, datetime(2026, 3, 1, 0, 0, 0), None, datetime(2026, 3, 1, 0, 0, 0)), + (True, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": None}, datetime(2026, 3, 1, 0, 0, 0)), + (True, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1}, datetime(2026, 3, 1, 0, 0, 0)), + (True, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1}, datetime(2026, 3, 1, 0, 0, 0)), + (True, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1}, datetime(2026, 3, 1, 0, 0, 0)), + (True, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1, "item_1736148125517": {"attribute_type": "file", "attribute_value_mlt": []}}, datetime(2026, 3, 1, 0, 0, 0)), + (True, None, {"item_type_id": 1, "item_1736148125517": {"attribute_type": "file", "attribute_value_mlt": []}}, None), + (True, datetime(2027, 1, 1, 0, 0, 0), {"item_type_id": 1, "item_1736148125517": {"attribute_type": "file", "attribute_value_mlt": [{"date": [{"dateType": "Available", "dateValue": "2026-12-31"}], "accessrole": "open_date"}]}}, datetime(2027, 1, 1, 0, 0, 0)), + (True, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1, "item_1736148125517": {"attribute_type": "file", "attribute_value_mlt": [{"date": [{"dateType": "Available", "dateValue": "2027-01-01"}], "accessrole": "open_date"}]}}, datetime(2026, 3, 1, 0, 0, 0)), + (True, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1, "item_1736148125517": {"attribute_type": "file", "attribute_value_mlt": [{"date": [{"dateType": "Available", "dateValue": "2027-01-01"}], "accessrole": "open_access"}]}}, datetime(2026, 3, 1, 0, 0, 0)), + + # WEKO_SEARCH_FIX_ACCESSRIGHTS=False + (False, None, {}, None), + (False, datetime(2026, 3, 1, 0, 0, 0), None, datetime(2026, 3, 1, 0, 0, 0)), + (False, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": None}, datetime(2026, 3, 1, 0, 0, 0)), + (False, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1}, datetime(2026, 3, 1, 0, 0, 0)), + (False, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1}, datetime(2026, 3, 1, 0, 0, 0)), + (False, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1}, datetime(2026, 3, 1, 0, 0, 0)), + (False, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1, "item_1736148125517": {"attribute_type": "file", "attribute_value_mlt": []}}, datetime(2026, 3, 1, 0, 0, 0)), + (False, None, {"item_type_id": 1, "item_1736148125517": {"attribute_type": "file", "attribute_value_mlt": []}}, None), + (False, datetime(2027, 1, 1, 0, 0, 0), {"item_type_id": 1, "item_1736148125517": {"attribute_type": "file", "attribute_value_mlt": [{"date": [{"dateType": "Available", "dateValue": "2026-12-31"}], "accessrole": "open_date"}]}}, datetime(2027, 1, 1, 0, 0, 0)), + (False, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1, "item_1736148125517": {"attribute_type": "file", "attribute_value_mlt": [{"date": [{"dateType": "Available", "dateValue": "2027-01-01"}], "accessrole": "open_date"}]}}, datetime(2026, 3, 1, 0, 0, 0)), +]) +# .tox/c1/bin/pytest --cov=invenio_records tests/test_api.py::test_record_updated_with_real_metadata -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/invenio-records/.tox/c1/tmp +def test_record_updated_with_real_metadata(app, monkeypatch, fix_accessrights, updated, meta_patch, expected): + base_meta = { + "item_type_id": 1 + } + meta = copy.deepcopy(base_meta) + if meta_patch is not None: + meta.update(copy.deepcopy(meta_patch)) + else: + meta = None + + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": "dummy_path"}) + # monkeypatch.setattr("weko_records.utils.check_embargo_rights", lambda a, t, d: (True, "open access")) + + with app.app_context(): + from flask import current_app + current_app.config["WEKO_SEARCH_FIX_ACCESSRIGHTS"] = fix_accessrights + record = Record({}) + class DummyModel: + def __init__(self, updated, json): + self.updated = updated + self.json = json + record.model = DummyModel(updated=updated, json=meta) + result = record.updated + assert result == expected diff --git a/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py b/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py index 96baff8d59..9f4fdc17b1 100644 --- a/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py +++ b/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py @@ -28,7 +28,7 @@ from weko_index_tree.api import Indexes from weko_schema_ui.models import PublishStatus from weko_search_ui.utils import execute_search_with_pagination -from weko_search_ui.query import range_query +from invenio_oaiserver.query import range_query from datetime import datetime from .config import WEKO_ROOT_INDEX @@ -265,8 +265,7 @@ def _get_index_search_query(_date_from: str, _date_until: str) -> dict: } } if current_app.config.get('WEKO_SEARCH_FIX_ACCESSRIGHTS', False): - now = datetime.now().isoformat() - rq = range_query(now, _date_from, _date_until) + rq = range_query(_date_from, _date_until) if rq is not None: post_filter['bool']['must'].append(rq.to_dict()) else: @@ -293,9 +292,8 @@ def _get_index_search_query(_date_from: str, _date_until: str) -> dict: "path": list_path } }) - if not current_app.config.get('WEKO_SEARCH_FIX_ACCESSRIGHTS', False): - now = datetime.now().isoformat() - rq = range_query(now, _date_from, _date_until) + if current_app.config.get('WEKO_SEARCH_FIX_ACCESSRIGHTS', False): + rq = range_query(_date_from, _date_until) if rq is not None: post_filter['bool']['must'].append(rq.to_dict()) else: @@ -307,6 +305,7 @@ def _get_index_search_query(_date_from: str, _date_until: str) -> dict: } } }) + # create search query wild_card = [] child_list = Indexes.get_child_list(q) diff --git a/modules/invenio-resourcesyncserver/tests/test_query.py b/modules/invenio-resourcesyncserver/tests/test_query.py index 20a423e029..7ac48ecd58 100644 --- a/modules/invenio-resourcesyncserver/tests/test_query.py +++ b/modules/invenio-resourcesyncserver/tests/test_query.py @@ -6,7 +6,7 @@ import datetime from elasticsearch import helpers from mock import patch, MagicMock, Mock -from flask import current_app, make_response, request +from flask import current_app, make_response, request, Flask from flask_login import current_user from flask_babelex import Babel @@ -68,7 +68,7 @@ def test_item_path_search_factory(i18n_app, indices): with patch("weko_index_tree.api.Indexes.get_list_path_publish", return_value="test"): with patch("weko_index_tree.api.Indexes.get_child_list", return_value=[MagicMock()]): assert item_path_search_factory(data_1, index_id="Root Index") - + assert item_path_search_factory(data_1, index_id=33) @@ -81,3 +81,68 @@ def test_item_changes_search_factory(i18n_app, indices): with patch("weko_index_tree.api.Indexes.get_list_path_publish", return_value="test"): with patch("weko_index_tree.api.Indexes.get_child_list", return_value=[MagicMock()]): assert item_changes_search_factory(search, index_id="Root Index") + +@pytest.mark.parametrize("fix_access, is_root, expect_range, expect_updated", [ + (True, False, True, False), # Non-root, fix_access=True → range_query is used + (False, False, False, True), # Non-root, fix_access=False → _updated range is used + (True, True, True, False), # Root, fix_access=True → range_query is used + (False, True, False, True), # Root, fix_access=False → _updated range is used +]) +# .tox/c1/bin/pytest --cov=invenio_resourcesyncserver tests/test_query.py::test_item_changes_search_factory_branch -v -s -vv --cov-branch --cov-report=term --cov-config=tox.ini --basetemp=/code/modules/invenio-resourcesyncserver/.tox/c1/tmp +def test_item_changes_search_factory_branch(monkeypatch, fix_access, is_root, expect_range, expect_updated): + # Flask Apps and Config + app = Flask(__name__) + app.config['WEKO_SEARCH_FIX_ACCESSRIGHTS'] = fix_access + app.config['WEKO_ROOT_INDEX'] = 0 + + # mock + class DummySearch: + def __init__(self): + self.query = None + def update_from_dict(self, q): + self.query = q + + class DummyItem: + cid = 'dummy' + + class DummyIndexes: + @staticmethod + def get_list_path_publish(index_id): + return [1, 2, 3] + @staticmethod + def get_child_list(q): + return [DummyItem()] + + called = {'range_query': False} + def fake_range_query(*args, **kwargs): + called['range_query'] = True + class DummyRQ: + def to_dict(self): + return {'RANGE_QUERY': True} + return DummyRQ() + + # Get the modules to be imported + from invenio_resourcesyncserver import query as query_mod + monkeypatch.setattr(query_mod, "range_query", fake_range_query) + monkeypatch.setattr(query_mod, "Indexes", DummyIndexes) + + with app.app_context(): + search = DummySearch() + index_id = 0 if is_root else 1 + result = query_mod.item_changes_search_factory( + search, index_id=index_id, date_from='2020-01-01', date_until='2020-12-31' + ) + q = result.query + if expect_range: + assert any( + (isinstance(m, dict) and 'RANGE_QUERY' in m) or + (hasattr(m, 'to_dict') and 'RANGE_QUERY' in m.to_dict()) + for m in q['post_filter']['bool']['must'] + ) + assert called['range_query'] + if expect_updated: + assert any( + (isinstance(m, dict) and '_updated' in m.get('range', {})) or + (hasattr(m, 'to_dict') and '_updated' in m.to_dict().get('range', {})) + for m in q['post_filter']['bool']['must'] + ) diff --git a/modules/weko-search-ui/weko_search_ui/query.py b/modules/weko-search-ui/weko_search_ui/query.py index 00dedc0c67..fac34f7bea 100644 --- a/modules/weko-search-ui/weko_search_ui/query.py +++ b/modules/weko-search-ui/weko_search_ui/query.py @@ -1519,81 +1519,3 @@ def _split_text_by_or(text): split_text_list = re.split(pattern, text) split_text_list = [item.strip() for item in split_text_list] return split_text_list - -def range_query(now, _from=None, _until=None): - """Generate a search query considering update date changes. - - Args: - now (str): Current time. - _from (str or None): Lower bound of update date. - _until (str or None): Upper bound of update date. - - Returns: - elasticsearch_dsl.query.Q or None: The generated query object, or None if no range is specified. - """ - if _from is None and _until is None: - return None - - # First should condition - must_not_embargoed = Q('bool', must_not=[Q('term', accessRights='embargoed access')]) - must_not_content_accessrole = Q('bool', must_not=[Q('nested', path='content', query=Q('exists', field='content.accessrole.raw'))]) - must_not_open_access = Q('nested', path='content', query=Q('bool', must_not=[ - Q('term', **{'content.accessrole.raw': 'open_access'}), - Q('bool', must=[ - Q('term', **{'content.accessrole.raw': 'open_date'}), - Q('range', **{'content.date.dateValue.raw': {'lte': now}}) - ]) - ])) - should1 = Q('bool', must=[ - Q('bool', should=[must_not_embargoed, must_not_content_accessrole, must_not_open_access]), - Q('range', **{ - '_updated': { - **({'gte': _from} if _from else {}), - **({'lte': _until} if _until else {}) - } - }) - ]) - - # Second should condition - must_not_open_access2 = Q('nested', path='content', query=Q('bool', must_not=[ - Q('term', **{'content.accessrole.raw': 'open_access'}), - Q('bool', must=[ - Q('term', **{'content.accessrole.raw': 'open_date'}), - Q('range', **{'content.date.dateValue.raw': {'lte': now}}) - ]) - ])) - - # from condition - from_should = [] - if _from: - from_should.append(Q('nested', path='content', query=Q('bool', must=[ - Q('term', **{'content.accessrole.raw': 'open_date'}), - Q('range', **{'content.date.dateValue.raw': {'gte': _from}}) - ]))) - from_should.append(Q('range', **{'_updated': {'gte': _from}})) - - # until condition - until_must = [] - if _until: - until_must.append(Q('bool', must_not=[ - Q('nested', path='content', query=Q('bool', must=[ - Q('term', **{'content.accessrole.raw': 'open_date'}), - Q('range', **{'content.date.dateValue.raw': {'gt': _until}}) - ])) - ])) - until_must.append(Q('range', **{'_updated': {'lte': _until}})) - - must2 = [ - Q('term', accessRights='embargoed access'), - Q('nested', path='content', query=Q('exists', field='content.accessrole.raw')), - Q('bool', must_not=[must_not_open_access2]) - ] - if from_should: - must2.append(Q('bool', should=from_should)) - if until_must: - must2.extend(until_must) - - should2 = Q('bool', must=must2) - - # Overall should - return Q('bool', should=[should1, should2], minimum_should_match=1) From 427f6b8934a3e52781adc5b921655f8b31a87c4c Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Wed, 15 Apr 2026 13:46:47 +0900 Subject: [PATCH 20/28] fix invenio_record test_api --- .../invenio-records/invenio_records/api.py | 3 - modules/invenio-records/tests/test_api.py | 292 ++++++++++++++++-- 2 files changed, 261 insertions(+), 34 deletions(-) diff --git a/modules/invenio-records/invenio_records/api.py b/modules/invenio-records/invenio_records/api.py index 95c59eae09..3dd739785b 100644 --- a/modules/invenio-records/invenio_records/api.py +++ b/modules/invenio-records/invenio_records/api.py @@ -450,9 +450,6 @@ def _get_nested_value(data, path): dt = datetime.combine(dt.date(), time(0, 0, 0)) open_dates.append(dt) - if not open_dates: - return original_updated - latest_open_date = max(open_dates) if original_updated: return max(original_updated, latest_open_date) diff --git a/modules/invenio-records/tests/test_api.py b/modules/invenio-records/tests/test_api.py index 1c0b131bdd..243f4b05ac 100644 --- a/modules/invenio-records/tests/test_api.py +++ b/modules/invenio-records/tests/test_api.py @@ -393,34 +393,266 @@ def test_validate_partial(app, db): with pytest.raises(ValidationError) as exc_info: record.commit(validator=PartialDraft4Validator) -@pytest.mark.parametrize("fix_accessrights, updated, meta_patch, expected", [ - # WEKO_SEARCH_FIX_ACCESSRIGHTS=True - (True, None, {}, None), - (True, datetime(2026, 3, 1, 0, 0, 0), None, datetime(2026, 3, 1, 0, 0, 0)), - (True, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": None}, datetime(2026, 3, 1, 0, 0, 0)), - (True, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1}, datetime(2026, 3, 1, 0, 0, 0)), - (True, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1}, datetime(2026, 3, 1, 0, 0, 0)), - (True, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1}, datetime(2026, 3, 1, 0, 0, 0)), - (True, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1, "item_1736148125517": {"attribute_type": "file", "attribute_value_mlt": []}}, datetime(2026, 3, 1, 0, 0, 0)), - (True, None, {"item_type_id": 1, "item_1736148125517": {"attribute_type": "file", "attribute_value_mlt": []}}, None), - (True, datetime(2027, 1, 1, 0, 0, 0), {"item_type_id": 1, "item_1736148125517": {"attribute_type": "file", "attribute_value_mlt": [{"date": [{"dateType": "Available", "dateValue": "2026-12-31"}], "accessrole": "open_date"}]}}, datetime(2027, 1, 1, 0, 0, 0)), - (True, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1, "item_1736148125517": {"attribute_type": "file", "attribute_value_mlt": [{"date": [{"dateType": "Available", "dateValue": "2027-01-01"}], "accessrole": "open_date"}]}}, datetime(2026, 3, 1, 0, 0, 0)), - (True, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1, "item_1736148125517": {"attribute_type": "file", "attribute_value_mlt": [{"date": [{"dateType": "Available", "dateValue": "2027-01-01"}], "accessrole": "open_access"}]}}, datetime(2026, 3, 1, 0, 0, 0)), - - # WEKO_SEARCH_FIX_ACCESSRIGHTS=False - (False, None, {}, None), - (False, datetime(2026, 3, 1, 0, 0, 0), None, datetime(2026, 3, 1, 0, 0, 0)), - (False, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": None}, datetime(2026, 3, 1, 0, 0, 0)), - (False, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1}, datetime(2026, 3, 1, 0, 0, 0)), - (False, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1}, datetime(2026, 3, 1, 0, 0, 0)), - (False, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1}, datetime(2026, 3, 1, 0, 0, 0)), - (False, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1, "item_1736148125517": {"attribute_type": "file", "attribute_value_mlt": []}}, datetime(2026, 3, 1, 0, 0, 0)), - (False, None, {"item_type_id": 1, "item_1736148125517": {"attribute_type": "file", "attribute_value_mlt": []}}, None), - (False, datetime(2027, 1, 1, 0, 0, 0), {"item_type_id": 1, "item_1736148125517": {"attribute_type": "file", "attribute_value_mlt": [{"date": [{"dateType": "Available", "dateValue": "2026-12-31"}], "accessrole": "open_date"}]}}, datetime(2027, 1, 1, 0, 0, 0)), - (False, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1, "item_1736148125517": {"attribute_type": "file", "attribute_value_mlt": [{"date": [{"dateType": "Available", "dateValue": "2027-01-01"}], "accessrole": "open_date"}]}}, datetime(2026, 3, 1, 0, 0, 0)), -]) -# .tox/c1/bin/pytest --cov=invenio_records tests/test_api.py::test_record_updated_with_real_metadata -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/invenio-records/.tox/c1/tmp -def test_record_updated_with_real_metadata(app, monkeypatch, fix_accessrights, updated, meta_patch, expected): +@pytest.mark.parametrize( + "fix_accessrights, access_path, updated, meta_patch, expected", + [ + # Empty data + (True, None, None, {}, None), + + # Only updated is set + (True, None, datetime(2026, 3, 1, 0, 0, 0), None, datetime(2026, 3, 1, 0, 0, 0)), + + # item_type_id is None + (True, None, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": None}, datetime(2026, 3, 1, 0, 0, 0)), + + # Only item_type_id is set + (True, None, datetime(2026, 3, 1, 0, 0, 0), {"item_type_id": 1}, datetime(2026, 3, 1, 0, 0, 0)), + + # Empty file attribute + (True, None, datetime(2026, 3, 1, 0, 0, 0), { + "item_type_id": 1, + "item_1736146823660": { + "attribute_name": "アクセス権", + "attribute_value_mlt": [] + } + }, datetime(2026, 3, 1, 0, 0, 0)), + + # Empty file attribute (updated=None) + (True, None, None, {"item_type_id": 1, "item_foo": {"attribute_type": "file", "attribute_value_mlt": []}}, None), + + # open_date is in the future + (True, "item_1736146823660.attribute_value_mlt.subitem_access_right", datetime(2027, 1, 1, 0, 0, 0), { + "item_type_id": 1, + "item_1736146823660": { + "attribute_name": "アクセス権", + "attribute_value_mlt": [ + { + "subitem_access_right": "embargoed access", + "subitem_access_right_uri": "http://purl.org/coar/access_right/c_f1cf" + } + ] + }, + "item_1736148125517": { + "attribute_type": "file", + "attribute_value_mlt": [ + {"date": [{"dateType": "Available", "dateValue": "2026-12-31"}], "accessrole": "open_date"} + ] + } + }, datetime(2027, 1, 1, 0, 0, 0)), + + # open_date is in the past + (True, "item_1736146823660.attribute_value_mlt.subitem_access_right", datetime(2026, 3, 1, 0, 0, 0), { + "item_type_id": 1, + "item_1736146823660": { + "attribute_name": "アクセス権", + "attribute_value_mlt": [ + { + "subitem_access_right": "embargoed access", + "subitem_access_right_uri": "http://purl.org/coar/access_right/c_f1cf" + } + ] + }, + "item_1736148125517": { + "attribute_type": "file", + "attribute_value_mlt": [ + {"date": [{"dateType": "Available", "dateValue": "2026-01-01"}], "accessrole": "open_date"} + ] + } + }, datetime(2026, 3, 1, 0, 0, 0)), + + # open_access file + (True, "item_1736146823660.attribute_value_mlt.subitem_access_right", datetime(2026, 3, 1, 0, 0, 0), { + "item_type_id": 1, + "item_1736146823660": { + "attribute_name": "アクセス権", + "attribute_value_mlt": [ + { + "subitem_access_right": "embargoed access", + "subitem_access_right_uri": "http://purl.org/coar/access_right/c_f1cf" + } + ] + }, + "item_1736148125517": { + "attribute_type": "file", + "attribute_value_mlt": [ + {"date": [{"dateType": "Available", "dateValue": "2026-04-01"}], "accessrole": "open_access"} + ] + } + }, datetime(2026, 4, 1, 0, 0, 0)), + + # date exists but no accessrole + (True, "item_1736146823660.attribute_value_mlt.subitem_access_right", datetime(2026, 3, 1, 0, 0, 0), { + "item_type_id": 1, + "item_1736146823660": { + "attribute_name": "アクセス権", + "attribute_value_mlt": [ + { + "subitem_access_right": "embargoed access", + "subitem_access_right_uri": "http://purl.org/coar/access_right/c_f1cf" + } + ] + }, + "item_1736148125517": { + "attribute_type": "file", + "attribute_value_mlt": [ + {"date": [{"dateType": "Available", "dateValue": "2027-01-01"}]} + ] + } + }, datetime(2026, 3, 1, 0, 0, 0)), + + # accessrole exists but no date + (True, "item_1736146823660.attribute_value_mlt.subitem_access_right", datetime(2026, 3, 1, 0, 0, 0), { + "item_type_id": 1, + "item_1736146823660": { + "attribute_name": "アクセス権", + "attribute_value_mlt": [ + { + "subitem_access_right": "embargoed access", + "subitem_access_right_uri": "http://purl.org/coar/access_right/c_f1cf" + } + ] + }, + "item_1736148125517": { + "attribute_type": "file", + "attribute_value_mlt": [ + {"accessrole": "open_date"} + ] + } + }, datetime(2026, 3, 1, 0, 0, 0)), + + # Multiple open_dates (with access rights info) + (True, "item_1736146823660.attribute_value_mlt.subitem_access_right", datetime(2026, 3, 1, 0, 0, 0), { + "item_type_id": 1, + "item_1736146823660": { + "attribute_name": "アクセス権", + "attribute_value_mlt": [ + { + "subitem_access_right": "embargoed access", + "subitem_access_right_uri": "http://purl.org/coar/access_right/c_f1cf" + } + ] + }, + "item_1736148125517": { + "attribute_type": "file", + "attribute_value_mlt": [ + {"date": [{"dateType": "Available", "dateValue": "2026-03-31"}], "accessrole": "open_date"}, + {"date": [{"dateType": "Available", "dateValue": "2026-03-30"}], "accessrole": "open_date"} + ] + } + }, datetime(2026, 3, 31, 0, 0, 0)), + + # Multiple open_dates (with access rights info), original_updated > max(open_dates) + (True, "item_1736146823660.attribute_value_mlt.subitem_access_right", datetime(2026, 4, 2, 0, 0, 0), { + "item_type_id": 1, + "item_1736146823660": { + "attribute_name": "アクセス権", + "attribute_value_mlt": [ + { + "subitem_access_right": "embargoed access", + "subitem_access_right_uri": "http://purl.org/coar/access_right/c_f1cf" + } + ] + }, + "item_1736148125517": { + "attribute_type": "file", + "attribute_value_mlt": [ + {"date": [{"dateType": "Available", "dateValue": "2026-03-31"}], "accessrole": "open_date"}, + {"date": [{"dateType": "Available", "dateValue": "2026-03-30"}], "accessrole": "open_date"} + ] + } + }, datetime(2026, 4, 2, 0, 0, 0)), + # WEKO_SEARCH_FIX_ACCESSRIGHTS is False + (False, "item_1736146823660.attribute_value_mlt.subitem_access_right", datetime(2026, 3, 1, 0, 0, 0), { + "item_type_id": 1, + "item_1736146823660": { + "attribute_name": "アクセス権", + "attribute_value_mlt": [ + { + "subitem_access_right": "embargoed access", + "subitem_access_right_uri": "http://purl.org/coar/access_right/c_f1cf" + } + ] + }, + "item_1736148125517": { + "attribute_type": "file", + "attribute_value_mlt": [ + {"date": [{"dateType": "Available", "dateValue": "2026-03-31"}], "accessrole": "open_date"}, + {"date": [{"dateType": "Available", "dateValue": "2026-03-30"}], "accessrole": "open_date"} + ] + } + }, datetime(2026, 3, 1, 0, 0, 0)), + + # _get_nested_value list branch (does not contain subitem_access_right) + (True, "item_foo.subitem_access_right", datetime(2026, 3, 1, 0, 0, 0), { + "item_type_id": 1, + "item_foo": [ + {"not_target": 1} + ] + }, datetime(2026, 3, 1, 0, 0, 0)), + + #Case where attribute_mlt is not in the path + (True, "item_foo.subitem_access_right",datetime(2026, 3, 1, 0, 0, 0), { + "item_type_id": 1, + "item_foo": { + "attribute_type": "アクセス権", + "attribute_value_mlt": [ + {"not_target": 1}, + {"subitem_access_right": "embargoed access"} + ] + } + }, + datetime(2026, 3, 1, 0, 0, 0) + ), + + # Case where the path key does not exist + (True,"item_foo.subitem_access_right", datetime(2026, 3, 1, 0, 0, 0), { + "item_type_id": 1, + "item_foo": { + "attribute_type": "file", + "attribute_value_mlt": [ + {"not_target": 1} + ] + } + }, + datetime(2026, 3, 1, 0, 0, 0) + ), + + # Case where value is neither list nor dict + (True, "item_foo.subitem_access_right" ,datetime(2026, 3, 1, 0, 0, 0),{ + "item_type_id": 1, + "item_foo": { + "attribute_type": "アクセス権", + "attribute_value_mlt": "invalid" + } + }, + datetime(2026, 3, 1, 0, 0, 0) + ), + + # Case where updated is None but opendate exists + (True, "item_1736146823660.attribute_value_mlt.subitem_access_right" ,None, { + "item_type_id": 1, + "item_1736146823660": { + "attribute_name": "アクセス権", + "attribute_value_mlt": [ + { + "subitem_access_right": "embargoed access", + "subitem_access_right_uri": "http://purl.org/coar/access_right/c_f1cf" + } + ] + }, + "item_1736148125517": { + "attribute_type": "file", + "attribute_value_mlt": [ + {"date": [{"dateType": "Available", "dateValue": "2026-03-31"}], "accessrole": "open_date"} + ] + } + }, + datetime(2026, 3, 31, 0, 0, 0) + ), + ] +) +# .tox/c1/bin/pytest --cov=invenio_records tests/test_api.py::test_record_updated -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/invenio-records/.tox/c1/tmp +def test_record_updated(app, monkeypatch, access_path, fix_accessrights, updated, meta_patch, expected): base_meta = { "item_type_id": 1 } @@ -430,9 +662,7 @@ def test_record_updated_with_real_metadata(app, monkeypatch, fix_accessrights, u else: meta = None - monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": "dummy_path"}) - # monkeypatch.setattr("weko_records.utils.check_embargo_rights", lambda a, t, d: (True, "open access")) - + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) with app.app_context(): from flask import current_app current_app.config["WEKO_SEARCH_FIX_ACCESSRIGHTS"] = fix_accessrights From 7c852610529c995df8c4c20c85fd4074e8be0e62 Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Wed, 15 Apr 2026 13:50:24 +0900 Subject: [PATCH 21/28] fix invenio_resourcesyncserver query.py --- .../invenio_resourcesyncserver/query.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py b/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py index 9f4fdc17b1..934a21e391 100644 --- a/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py +++ b/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py @@ -30,7 +30,6 @@ from weko_search_ui.utils import execute_search_with_pagination from invenio_oaiserver.query import range_query -from datetime import datetime from .config import WEKO_ROOT_INDEX From 150521a5416c36b2b36c45e86f5c7f61b41c3856 Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Wed, 15 Apr 2026 18:28:19 +0900 Subject: [PATCH 22/28] fix invenio-records updated --- modules/invenio-records/invenio_records/api.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/modules/invenio-records/invenio_records/api.py b/modules/invenio-records/invenio_records/api.py index 3dd739785b..5f72380674 100644 --- a/modules/invenio-records/invenio_records/api.py +++ b/modules/invenio-records/invenio_records/api.py @@ -385,8 +385,8 @@ def updated(self): original_updated = self.model.updated if not current_app.config.get('WEKO_SEARCH_FIX_ACCESSRIGHTS', False): return original_updated - db.session.expire_all() - metadata = self.model.json if self.model else None + fresh_model = db.session.query(RecordMetadata).populate_existing().get(self.model.id) + metadata = fresh_model.json if fresh_model else None item_type_id = metadata.get("item_type_id") if metadata else None from weko_records.serializers.utils import get_mapping mapping = get_mapping(item_type_id, "jpcoar_mapping") if item_type_id else None @@ -423,6 +423,7 @@ def _get_nested_value(data, path): _get_nested_value(metadata, access_path) if metadata and access_path else None ) + print("[DEBUG] access_rights:", access_rights) accessrole_date = [] for v in metadata.values() if metadata else []: if isinstance(v, dict) and v.get("attribute_type") == "file": @@ -430,18 +431,23 @@ def _get_nested_value(data, path): date_list = file_info.get("date", []) date_value = date_list[0].get("dateValue") if date_list else None accessrole = file_info.get("accessrole") + print(f"[DEBUG] file_info: {{}} date_value: {{}} accessrole: {{}}".format(file_info, date_value, accessrole)) if date_value and accessrole: date_obj = datetime.strptime(date_value, "%Y-%m-%d").date() accessrole_date.append((accessrole, date_obj)) + print("[DEBUG] accessrole_date:", accessrole_date) today = datetime.now().date() + print("[DEBUG] today:", today) from weko_records.utils import check_embargo_rights is_update, change_value = check_embargo_rights( access_rights, today, accessrole_date ) + print("[DEBUG] is_update:", is_update, "change_value:", change_value) if not (is_update and change_value == "open access"): + print("[DEBUG] Not open access or not update, returning original_updated") return original_updated open_dates = [] @@ -449,13 +455,17 @@ def _get_nested_value(data, path): dt = datetime.combine(date_str, time(0, 0, 0)) dt = datetime.combine(dt.date(), time(0, 0, 0)) open_dates.append(dt) + print("[DEBUG] open_dates:", open_dates) latest_open_date = max(open_dates) + print("[DEBUG] latest_open_date:", latest_open_date) if original_updated: - return max(original_updated, latest_open_date) + result = max(original_updated, latest_open_date) + print("[DEBUG] Returning max(original_updated, latest_open_date):", result) + return result else: + print("[DEBUG] Returning latest_open_date:", latest_open_date) return latest_open_date - class RecordRevision(RecordBase): """API for record revisions.""" From 7d3a68d46b1e0fdcb63d5c4d871c49847a57277a Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Fri, 17 Apr 2026 10:58:04 +0900 Subject: [PATCH 23/28] fix updated & range_query --- .../invenio_oaiserver/query.py | 21 ++++++++++++++++--- .../invenio-records/invenio_records/api.py | 13 +----------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/modules/invenio-oaiserver/invenio_oaiserver/query.py b/modules/invenio-oaiserver/invenio_oaiserver/query.py index 336d50bc3f..b5dd52e28b 100644 --- a/modules/invenio-oaiserver/invenio_oaiserver/query.py +++ b/modules/invenio-oaiserver/invenio_oaiserver/query.py @@ -308,6 +308,21 @@ def range_query(_from=None, _until=None): if _from is None and _until is None: return None + if isinstance(_from, datetime): + _from = _from.strftime('%Y-%m-%d') + elif isinstance(_from, str) and len(_from) >= 10: + _from = _from[:10] + else: + _from = None + + if isinstance(_until, datetime): + _until = _until.strftime('%Y-%m-%d') + elif isinstance(_until, str) and len(_until) >= 10: + _until = _until[:10] + else: + _until = None + now = datetime.now().strftime('%Y-%m-%d') + # First should condition must_not_embargoed = Q( @@ -333,7 +348,7 @@ def range_query(_from=None, _until=None): 'bool', must=[ Q('term', **{'content.accessrole.raw': 'open_date'}), - Q('range', **{'content.date.dateValue.raw': {'lte': 'now'}}) + Q('range', **{'content.date.dateValue.raw': {'lte': now}}) ] ) ] @@ -374,7 +389,7 @@ def range_query(_from=None, _until=None): 'bool', must=[ Q('term', **{'content.accessrole.raw': 'open_date'}), - Q('range', **{'content.date.dateValue.raw': {'lte': 'now'}}) + Q('range', **{'content.date.dateValue.raw': {'lte': now}}) ] ) ] @@ -443,4 +458,4 @@ def range_query(_from=None, _until=None): 'bool', should=[should1, should2], minimum_should_match=1 - ) \ No newline at end of file + ) diff --git a/modules/invenio-records/invenio_records/api.py b/modules/invenio-records/invenio_records/api.py index 5f72380674..d73f5fac3a 100644 --- a/modules/invenio-records/invenio_records/api.py +++ b/modules/invenio-records/invenio_records/api.py @@ -385,8 +385,7 @@ def updated(self): original_updated = self.model.updated if not current_app.config.get('WEKO_SEARCH_FIX_ACCESSRIGHTS', False): return original_updated - fresh_model = db.session.query(RecordMetadata).populate_existing().get(self.model.id) - metadata = fresh_model.json if fresh_model else None + metadata = db.session.query(RecordMetadata.json).filter_by(id=self.model.id).scalar() item_type_id = metadata.get("item_type_id") if metadata else None from weko_records.serializers.utils import get_mapping mapping = get_mapping(item_type_id, "jpcoar_mapping") if item_type_id else None @@ -423,7 +422,6 @@ def _get_nested_value(data, path): _get_nested_value(metadata, access_path) if metadata and access_path else None ) - print("[DEBUG] access_rights:", access_rights) accessrole_date = [] for v in metadata.values() if metadata else []: if isinstance(v, dict) and v.get("attribute_type") == "file": @@ -431,23 +429,18 @@ def _get_nested_value(data, path): date_list = file_info.get("date", []) date_value = date_list[0].get("dateValue") if date_list else None accessrole = file_info.get("accessrole") - print(f"[DEBUG] file_info: {{}} date_value: {{}} accessrole: {{}}".format(file_info, date_value, accessrole)) if date_value and accessrole: date_obj = datetime.strptime(date_value, "%Y-%m-%d").date() accessrole_date.append((accessrole, date_obj)) - print("[DEBUG] accessrole_date:", accessrole_date) today = datetime.now().date() - print("[DEBUG] today:", today) from weko_records.utils import check_embargo_rights is_update, change_value = check_embargo_rights( access_rights, today, accessrole_date ) - print("[DEBUG] is_update:", is_update, "change_value:", change_value) if not (is_update and change_value == "open access"): - print("[DEBUG] Not open access or not update, returning original_updated") return original_updated open_dates = [] @@ -455,16 +448,12 @@ def _get_nested_value(data, path): dt = datetime.combine(date_str, time(0, 0, 0)) dt = datetime.combine(dt.date(), time(0, 0, 0)) open_dates.append(dt) - print("[DEBUG] open_dates:", open_dates) latest_open_date = max(open_dates) - print("[DEBUG] latest_open_date:", latest_open_date) if original_updated: result = max(original_updated, latest_open_date) - print("[DEBUG] Returning max(original_updated, latest_open_date):", result) return result else: - print("[DEBUG] Returning latest_open_date:", latest_open_date) return latest_open_date class RecordRevision(RecordBase): """API for record revisions.""" From 07c755a3571f0c1ac3c62921dbfa3bf2c5d3631c Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Fri, 17 Apr 2026 17:27:08 +0900 Subject: [PATCH 24/28] fix range_query & _get_index_search_query --- .../invenio_oaiserver/query.py | 28 +++++++++++-------- .../invenio_resourcesyncserver/query.py | 25 +++++++++++++++++ 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/modules/invenio-oaiserver/invenio_oaiserver/query.py b/modules/invenio-oaiserver/invenio_oaiserver/query.py index b5dd52e28b..95bed046e3 100644 --- a/modules/invenio-oaiserver/invenio_oaiserver/query.py +++ b/modules/invenio-oaiserver/invenio_oaiserver/query.py @@ -299,28 +299,34 @@ def range_query(_from=None, _until=None): """Generate a search query considering update date changes. Args: - _from (str or None): Lower bound of update date. - _until (str or None): Upper bound of update date. + _from (datetime or str or None): + Lower bound of update date. + _until (datetime or str or None): + Upper bound of update date. Returns: - elasticsearch_dsl.query.Q or None: The generated query object, or None if no range is specified. + elasticsearch_dsl.query.Q or None: + The generated query object, or None if no range is specified. """ if _from is None and _until is None: return None if isinstance(_from, datetime): - _from = _from.strftime('%Y-%m-%d') + from_date = _from.strftime('%Y-%m-%d') + _from = _from.isoformat() elif isinstance(_from, str) and len(_from) >= 10: - _from = _from[:10] + from_date = _from[:10] else: - _from = None + from_date = None if isinstance(_until, datetime): - _until = _until.strftime('%Y-%m-%d') + until_date = _until.strftime('%Y-%m-%d') + _until = _until.isoformat() elif isinstance(_until, str) and len(_until) >= 10: - _until = _until[:10] + until_date = _until[:10] else: - _until = None + until_date = None + now = datetime.now().strftime('%Y-%m-%d') # First should condition @@ -407,7 +413,7 @@ def range_query(_from=None, _until=None): 'bool', must=[ Q('term', **{'content.accessrole.raw': 'open_date'}), - Q('range', **{'content.date.dateValue.raw': {'gte': _from}}) + Q('range', **{'content.date.dateValue.raw': {'gte': from_date}}) ] ) ) @@ -428,7 +434,7 @@ def range_query(_from=None, _until=None): 'bool', must=[ Q('term', **{'content.accessrole.raw': 'open_date'}), - Q('range', **{'content.date.dateValue.raw': {'gt': _until}}) + Q('range', **{'content.date.dateValue.raw': {'gt': until_date}}) ] ) ) diff --git a/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py b/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py index 934a21e391..c7ee054d33 100644 --- a/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py +++ b/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py @@ -29,6 +29,7 @@ from weko_schema_ui.models import PublishStatus from weko_search_ui.utils import execute_search_with_pagination from invenio_oaiserver.query import range_query +from datetime import datetime, timedelta from .config import WEKO_ROOT_INDEX @@ -264,6 +265,18 @@ def _get_index_search_query(_date_from: str, _date_until: str) -> dict: } } if current_app.config.get('WEKO_SEARCH_FIX_ACCESSRIGHTS', False): + if len(_date_until) == 19 and 'T' in _date_until: + _date_until = ( + datetime.strptime(_date_until, '%Y-%m-%dT%H:%M:%S') + - timedelta(seconds=1) + ) + _date_until = _date_until.isoformat() + elif len(_date_until) == 10: + _date_until = ( + datetime.strptime(_date_until, '%Y-%m-%d') + - timedelta(seconds=1) + ) + _date_until = _date_until.isoformat() rq = range_query(_date_from, _date_until) if rq is not None: post_filter['bool']['must'].append(rq.to_dict()) @@ -292,6 +305,18 @@ def _get_index_search_query(_date_from: str, _date_until: str) -> dict: } }) if current_app.config.get('WEKO_SEARCH_FIX_ACCESSRIGHTS', False): + if len(_date_until) == 19 and 'T' in _date_until: + _date_until = ( + datetime.strptime(_date_until, '%Y-%m-%dT%H:%M:%S') + - timedelta(seconds=1) + ) + _date_until = _date_until.isoformat() + elif len(_date_until) == 10: + _date_until = ( + datetime.strptime(_date_until, '%Y-%m-%d') + - timedelta(seconds=1) + ) + _date_until = _date_until.isoformat() rq = range_query(_date_from, _date_until) if rq is not None: post_filter['bool']['must'].append(rq.to_dict()) From 10a318949ca4c13cbd2d9f4bb6a9f4166fa9d543 Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Mon, 20 Apr 2026 14:30:10 +0900 Subject: [PATCH 25/28] fix unit test: embargo updated --- .../invenio_oaiserver/query.py | 9 +++-- modules/invenio-oaiserver/tests/test_query.py | 39 +++++++++++++++++++ modules/invenio-records/tests/test_api.py | 18 ++++++++- 3 files changed, 60 insertions(+), 6 deletions(-) diff --git a/modules/invenio-oaiserver/invenio_oaiserver/query.py b/modules/invenio-oaiserver/invenio_oaiserver/query.py index 95bed046e3..e7946f1b16 100644 --- a/modules/invenio-oaiserver/invenio_oaiserver/query.py +++ b/modules/invenio-oaiserver/invenio_oaiserver/query.py @@ -308,8 +308,6 @@ def range_query(_from=None, _until=None): elasticsearch_dsl.query.Q or None: The generated query object, or None if no range is specified. """ - if _from is None and _until is None: - return None if isinstance(_from, datetime): from_date = _from.strftime('%Y-%m-%d') @@ -317,7 +315,7 @@ def range_query(_from=None, _until=None): elif isinstance(_from, str) and len(_from) >= 10: from_date = _from[:10] else: - from_date = None + _from = None if isinstance(_until, datetime): until_date = _until.strftime('%Y-%m-%d') @@ -325,7 +323,10 @@ def range_query(_from=None, _until=None): elif isinstance(_until, str) and len(_until) >= 10: until_date = _until[:10] else: - until_date = None + _until = None + + if _from is None and _until is None: + return None now = datetime.now().strftime('%Y-%m-%d') diff --git a/modules/invenio-oaiserver/tests/test_query.py b/modules/invenio-oaiserver/tests/test_query.py index 11c377689a..3f0f7b7271 100644 --- a/modules/invenio-oaiserver/tests/test_query.py +++ b/modules/invenio-oaiserver/tests/test_query.py @@ -307,6 +307,45 @@ def test_get_records_with_set(es_app,db, users): # .tox/c1/bin/pytest --cov=invenio_oaiserver tests/test_query.py::test_range_query -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/invenio-oaiserver/.tox/c1/tmp def test_range_query(): + # Case: _from is datetime + from datetime import datetime + _from = datetime(2026, 1, 1, 12, 0, 0) + result = range_query(_from, None) + assert result is not None + d = result.to_dict() + should2 = d['bool']['should'][1]['bool']['must'] + assert any('gte' in str(x) for x in should2) + + # Case: _until is datetime + _until = datetime(2026, 12, 31, 23, 59, 59) + result = range_query(None, _until) + assert result is not None + d = result.to_dict() + should2 = d['bool']['should'][1]['bool']['must'] + assert any('lte' in str(x) for x in should2) + + + # Case: _from is invalid type (int) + _from = 123456 + result = range_query(_from, None) + assert result is None + + # Case: _until is invalid type (int) + _until = 123456 + result = range_query(None, _until) + assert result is None + + + # Case: _from is short string (invalid) + _from = '2026-01' + result = range_query(_from, None) + assert result is None + + # Case: _until is short string (invalid) + _until = '2026-12' + result = range_query(None, _until) + assert result is None + # Case: both _from and _until are None result = range_query(None, None) assert result is None diff --git a/modules/invenio-records/tests/test_api.py b/modules/invenio-records/tests/test_api.py index 243f4b05ac..071657de17 100644 --- a/modules/invenio-records/tests/test_api.py +++ b/modules/invenio-records/tests/test_api.py @@ -662,15 +662,29 @@ def test_record_updated(app, monkeypatch, access_path, fix_accessrights, updated else: meta = None + class DummyQuery: + def filter_by(self, id): + return self + def scalar(self): + return meta + + class DummySession: + def __init__(self): + self.session = self + def query(self, *args, **kwargs): + return DummyQuery() + monkeypatch.setattr("weko_records.serializers.utils.get_mapping", lambda i, t: {"accessRights.@value": access_path}) with app.app_context(): from flask import current_app current_app.config["WEKO_SEARCH_FIX_ACCESSRIGHTS"] = fix_accessrights record = Record({}) class DummyModel: - def __init__(self, updated, json): + def __init__(self, updated, json, id=1): self.updated = updated self.json = json - record.model = DummyModel(updated=updated, json=meta) + self.id = id + record.model = DummyModel(updated=updated, json=meta, id=1) + monkeypatch.setattr("invenio_records.api.db", DummySession()) result = record.updated assert result == expected From c37145ca15d451bbd1614b9a33e4e5a12f448478 Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Tue, 21 Apr 2026 11:38:03 +0900 Subject: [PATCH 26/28] fix test_item_changes_search_factory --- modules/invenio-oaiserver/tests/test_query.py | 2 - .../tests/test_query.py | 61 ++++++++++++++++--- 2 files changed, 54 insertions(+), 9 deletions(-) diff --git a/modules/invenio-oaiserver/tests/test_query.py b/modules/invenio-oaiserver/tests/test_query.py index 3f0f7b7271..93ff024ad3 100644 --- a/modules/invenio-oaiserver/tests/test_query.py +++ b/modules/invenio-oaiserver/tests/test_query.py @@ -324,7 +324,6 @@ def test_range_query(): should2 = d['bool']['should'][1]['bool']['must'] assert any('lte' in str(x) for x in should2) - # Case: _from is invalid type (int) _from = 123456 result = range_query(_from, None) @@ -335,7 +334,6 @@ def test_range_query(): result = range_query(None, _until) assert result is None - # Case: _from is short string (invalid) _from = '2026-01' result = range_query(_from, None) diff --git a/modules/invenio-resourcesyncserver/tests/test_query.py b/modules/invenio-resourcesyncserver/tests/test_query.py index 7ac48ecd58..fd13d62676 100644 --- a/modules/invenio-resourcesyncserver/tests/test_query.py +++ b/modules/invenio-resourcesyncserver/tests/test_query.py @@ -82,14 +82,19 @@ def test_item_changes_search_factory(i18n_app, indices): with patch("weko_index_tree.api.Indexes.get_child_list", return_value=[MagicMock()]): assert item_changes_search_factory(search, index_id="Root Index") -@pytest.mark.parametrize("fix_access, is_root, expect_range, expect_updated", [ - (True, False, True, False), # Non-root, fix_access=True → range_query is used - (False, False, False, True), # Non-root, fix_access=False → _updated range is used - (True, True, True, False), # Root, fix_access=True → range_query is used - (False, True, False, True), # Root, fix_access=False → _updated range is used +@pytest.mark.parametrize("fix_access, is_root, date_until, rq_none, expect_range, expect_updated", [ + # fix_access, is_root, date_until, rq_none, expect_range, expect_updated + (True, False, '2020-12-31T23:59:59', False, True, False), # 19 digits, range_query enabled + (True, False, '2020-12-31', False, True, False), # 10 digits, range_query enabled + (True, False, 'invalid', False, True, False), # invalid value, range_query enabled + (True, False, '2020-12-31T23:59:59', True, False, False), # 19 digits, range_query is None + (False, False, '2020-12-31T23:59:59', False, False, True), # fix_access=False, _updated + (True, True, '2020-12-31T23:59:59', False, True, False), # root, range_query enabled + (False, True, '2020-12-31T23:59:59', False, False, True), # root, fix_access=False + (True, True, '2020-12-31', False, True, False), # root, 10 digits, range_query enabled ]) # .tox/c1/bin/pytest --cov=invenio_resourcesyncserver tests/test_query.py::test_item_changes_search_factory_branch -v -s -vv --cov-branch --cov-report=term --cov-config=tox.ini --basetemp=/code/modules/invenio-resourcesyncserver/.tox/c1/tmp -def test_item_changes_search_factory_branch(monkeypatch, fix_access, is_root, expect_range, expect_updated): +def test_item_changes_search_factory_branch(monkeypatch, fix_access, is_root, date_until, rq_none, expect_range, expect_updated): # Flask Apps and Config app = Flask(__name__) app.config['WEKO_SEARCH_FIX_ACCESSRIGHTS'] = fix_access @@ -116,6 +121,8 @@ def get_child_list(q): called = {'range_query': False} def fake_range_query(*args, **kwargs): called['range_query'] = True + if rq_none: + return None class DummyRQ: def to_dict(self): return {'RANGE_QUERY': True} @@ -130,7 +137,7 @@ def to_dict(self): search = DummySearch() index_id = 0 if is_root else 1 result = query_mod.item_changes_search_factory( - search, index_id=index_id, date_from='2020-01-01', date_until='2020-12-31' + search, index_id=index_id, date_from='2020-01-01', date_until=date_until ) q = result.query if expect_range: @@ -140,9 +147,49 @@ def to_dict(self): for m in q['post_filter']['bool']['must'] ) assert called['range_query'] + else: + # If range_query is None, RANGE_QUERY should not be included in must + assert not any( + (isinstance(m, dict) and 'RANGE_QUERY' in m) or + (hasattr(m, 'to_dict') and 'RANGE_QUERY' in m.to_dict()) + for m in q['post_filter']['bool']['must'] + ) if expect_updated: assert any( (isinstance(m, dict) and '_updated' in m.get('range', {})) or (hasattr(m, 'to_dict') and '_updated' in m.to_dict().get('range', {})) for m in q['post_filter']['bool']['must'] ) + +def test_item_changes_search_factory_except_baseexception(monkeypatch): + from invenio_resourcesyncserver import query as query_mod + from flask import Flask + class DummySearch: + def update_from_dict(self, q): + pass + # Mock json.dumps to raise TypeError + monkeypatch.setattr(query_mod.json, "dumps", lambda *a, **k: (_ for _ in ()).throw(TypeError("dummy"))) + # Mock Indexes.get_list_path_publish + monkeypatch.setattr(query_mod.Indexes, "get_list_path_publish", lambda idx: ["dummy"]) + app = Flask(__name__) + app.config["WEKO_ROOT_INDEX"] = 0 + with app.app_context(): + search = DummySearch() + # Confirm that the exception is suppressed and the function returns normally + result = query_mod.item_changes_search_factory(search, index_id=1) + assert result is not None + +def test_item_changes_search_factory_except_syntaxerror(monkeypatch): + from invenio_resourcesyncserver import query as query_mod + from flask import Flask + class DummySearch: + def update_from_dict(self, q): + raise SyntaxError("dummy syntax error") + # Mock Indexes.get_list_path_publish + monkeypatch.setattr(query_mod.Indexes, "get_list_path_publish", lambda idx: ["dummy"]) + app = Flask(__name__) + app.config["WEKO_ROOT_INDEX"] = 0 + with app.app_context(): + search = DummySearch() + with pytest.raises(query_mod.InvalidQueryRESTError): + query_mod.item_changes_search_factory(search, index_id=1) From 43ab98ad8c8f06a36c6a0c2506bb6121406ba78c Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Tue, 21 Apr 2026 15:32:08 +0900 Subject: [PATCH 27/28] fix test_item_changes_search_factory --- modules/invenio-resourcesyncserver/tests/test_query.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/invenio-resourcesyncserver/tests/test_query.py b/modules/invenio-resourcesyncserver/tests/test_query.py index fd13d62676..a9cfd0dbd8 100644 --- a/modules/invenio-resourcesyncserver/tests/test_query.py +++ b/modules/invenio-resourcesyncserver/tests/test_query.py @@ -91,7 +91,8 @@ def test_item_changes_search_factory(i18n_app, indices): (False, False, '2020-12-31T23:59:59', False, False, True), # fix_access=False, _updated (True, True, '2020-12-31T23:59:59', False, True, False), # root, range_query enabled (False, True, '2020-12-31T23:59:59', False, False, True), # root, fix_access=False - (True, True, '2020-12-31', False, True, False), # root, 10 digits, range_query enabled + (True, True, '2020-12-31', False, True, False), # root, 10 digits, range_query enabled + (True, True, 'invalid', True, False, False), # root, invalid value, range_query is None ]) # .tox/c1/bin/pytest --cov=invenio_resourcesyncserver tests/test_query.py::test_item_changes_search_factory_branch -v -s -vv --cov-branch --cov-report=term --cov-config=tox.ini --basetemp=/code/modules/invenio-resourcesyncserver/.tox/c1/tmp def test_item_changes_search_factory_branch(monkeypatch, fix_access, is_root, date_until, rq_none, expect_range, expect_updated): From ca2909db2be23cc7dd4475ee3003912468ebfe39 Mon Sep 17 00:00:00 2001 From: "kenji.shiokawa" Date: Tue, 21 Apr 2026 16:34:34 +0900 Subject: [PATCH 28/28] fix invenio-oaiserver test_query.py --- modules/invenio-oaiserver/tests/test_query.py | 35 +++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/modules/invenio-oaiserver/tests/test_query.py b/modules/invenio-oaiserver/tests/test_query.py index 93ff024ad3..e62d2283db 100644 --- a/modules/invenio-oaiserver/tests/test_query.py +++ b/modules/invenio-oaiserver/tests/test_query.py @@ -377,18 +377,19 @@ def test_range_query(): assert any('gte' in str(x) for x in should2) assert any('lte' in str(x) for x in should2) -@pytest.mark.parametrize("fix_access, from_, until, expect_range, expect_rq", [ - (False, None, None, False, False), - (False, "2026-01-01", None, True, False), - (False, None, "2026-12-31", True, False), - (False, "2026-01-01", "2026-12-31", True, False), - (True, None, None, False, False), - (True, "2026-01-01", None, True, True), - (True, None, "2026-12-31", True, True), - (True, "2026-01-01", "2026-12-31", True, True), +@pytest.mark.parametrize("fix_access, from_, until, expect_range, expect_rq, rangequery_none", [ + (False, None, None, False, False, False), + (False, "2026-01-01", None, True, False, False), + (False, None, "2026-12-31", True, False, False), + (False, "2026-01-01", "2026-12-31", True, False, False), + (True, None, None, False, False, False), + (True, "2026-01-01", None, True, True, False), + (True, None, "2026-12-31", True, True, False), + (True, "2026-01-01", "2026-12-31", True, True, False), + (True, "2026-01-01", "2026-12-31", False, True, True), ]) # .tox/c1/bin/pytest --cov=invenio_oaiserver tests/test_query.py::test_get_records_range_branch -v -s -vv --cov-branch --cov-report=term --cov-config=tox.ini --basetemp=/code/modules/invenio-oaiserver/.tox/c1/tmp -def test_get_records_range_branch(es_app, db, monkeypatch, fix_access, from_, until, expect_range, expect_rq): +def test_get_records_range_branch(es_app, db, monkeypatch, fix_access, from_, until, expect_range, expect_rq, rangequery_none): es_app.config['WEKO_SEARCH_FIX_ACCESSRIGHTS'] = fix_access index = Index( @@ -439,10 +440,16 @@ def filter_spy(self, *args, **kwargs): monkeypatch.setattr(query_mod.OAIServerSearch, "filter", filter_spy) orig_rq = query_mod.range_query - def rq_spy(_from, _until): - called["rq"] = True - return orig_rq(_from, _until) - monkeypatch.setattr(query_mod, "range_query", rq_spy) + if rangequery_none: + def rq_spy(_from, _until): + called["rq"] = True + return None + monkeypatch.setattr(query_mod, "range_query", rq_spy) + else: + def rq_spy(_from, _until): + called["rq"] = True + return orig_rq(_from, _until) + monkeypatch.setattr(query_mod, "range_query", rq_spy) with es_app.app_context(): kwargs = {"set": "30"}