[Git][debian-gis-team/asf-search][upstream] New upstream version 6.6.2
Antonio Valentino (@antonio.valentino)
gitlab at salsa.debian.org
Sat Aug 12 16:05:58 BST 2023
Antonio Valentino pushed to branch upstream at Debian GIS Project / asf-search
Commits:
91f88ca0 by Antonio Valentino at 2023-08-12T15:01:28+00:00
New upstream version 6.6.2
- - - - -
7 changed files:
- CHANGELOG.md
- asf_search/exceptions.py
- asf_search/search/baseline_search.py
- asf_search/search/search_count.py
- asf_search/search/search_generator.py
- tests/Search/test_search.py
- tests/yml_tests/test_search_generator.yml
Changes:
=====================================
CHANGELOG.md
=====================================
@@ -25,6 +25,15 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-
-->
+------
+## [v6.6.2](https://github.com/asfadmin/Discovery-asf_search/compare/v6.6.1...v6.6.2)
+### Added
+- Adds new `CMRIncompleteError` exception, raised by search methods when CMR returns an incomplete page
+### Fixed
+- Fixes bug in `search_generator()` causing results to sometimes wrongly be marked as incomplete
+### Changed
+- `stack_from_id()` now raises if results are incomplete, before checking if reference was found
+
------
## [v6.6.1](https://github.com/asfadmin/Discovery-asf_search/compare/v6.6.0...v6.6.1)
### Added
=====================================
asf_search/exceptions.py
=====================================
@@ -37,3 +37,6 @@ class CMRError(Exception):
class CMRConceptIDError(CMRError):
"""Raise when CMR encounters a concept-id error"""
+
+class CMRIncompleteError(CMRError):
+ """Raise when CMR returns an incomplete page of results"""
=====================================
asf_search/search/baseline_search.py
=====================================
@@ -45,6 +45,7 @@ def stack_from_product(
return stack
+
def stack_from_id(
reference_id: str,
opts: ASFSearchOptions = None
@@ -60,8 +61,11 @@ def stack_from_id(
opts = (ASFSearchOptions() if opts is None else copy(opts))
- reference_results = product_search(product_list=reference_id, opts=opts)
+ reference_results = product_search(product_list=reference_id, opts=opts)
+
+ reference_results.raise_if_incomplete()
+
if len(reference_results) <= 0:
raise ASFSearchError(f'Reference product not found: {reference_id}')
reference = reference_results[0]
=====================================
asf_search/search/search_count.py
=====================================
@@ -61,6 +61,6 @@ def search_count(
idx = translated_opts.index(('page_size', INTERNAL.CMR_PAGE_SIZE))
translated_opts[idx] = ('page_size', 0)
- response = get_page(session=opts.session, url=url, translated_opts=translated_opts, search_opts=query)
+ response = get_page(session=opts.session, url=url, translated_opts=translated_opts)
count += response.json()['hits']
return count
=====================================
asf_search/search/search_generator.py
=====================================
@@ -3,7 +3,7 @@ from typing import Generator, Union, Iterable, Tuple
from copy import copy
from requests.exceptions import HTTPError
from requests import ReadTimeout, Response
-from tenacity import retry, retry_if_exception_type, stop_after_delay, wait_exponential
+from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential, wait_fixed
import datetime
import dateparser
import warnings
@@ -15,7 +15,7 @@ from asf_search.ASFSearchOptions import ASFSearchOptions
from asf_search.CMR import build_subqueries, translate_opts
from asf_search.ASFSession import ASFSession
from asf_search.ASFProduct import ASFProduct
-from asf_search.exceptions import ASFSearch4xxError, ASFSearch5xxError, ASFSearchError
+from asf_search.exceptions import ASFSearch4xxError, ASFSearch5xxError, ASFSearchError, CMRIncompleteError
from asf_search.constants import INTERNAL
from asf_search.WKT.validate_wkt import validate_wkt
from asf_search.search.error_reporting import report_search_error
@@ -78,78 +78,75 @@ def search_generator(
preprocess_opts(opts)
url = '/'.join(s.strip('/') for s in [f'https://{opts.host}', f'{INTERNAL.CMR_GRANULE_PATH}'])
- count = 0
+ total = 0
- for query in build_subqueries(opts):
+ queries = build_subqueries(opts)
+ for query in queries:
translated_opts = translate_opts(query)
- try:
- response = get_page(session=opts.session, url=url, translated_opts=translated_opts, search_opts=query)
- except ASFSearchError as e:
- message = str(e)
- logging.error(message)
- report_search_error(query, message)
- opts.session.headers.pop('CMR-Search-After', None)
- return
-
- hits = [ASFProduct(f, session=query.session) for f in response.json()['items']]
-
- if 'CMR-Search-After' in response.headers:
- opts.session.headers.update({'CMR-Search-After': response.headers['CMR-Search-After']})
-
- if maxResults != None:
- last_page = ASFSearchResults(hits[:min(maxResults - count, len(hits))], opts=opts)
- count += len(last_page)
-
- if count == maxResults:
- last_page.searchComplete = True
- yield last_page
- return
- else:
- yield last_page
- else:
- count += len(hits)
- yield ASFSearchResults(hits, opts=opts)
-
- while('CMR-Search-After' in response.headers):
- opts.session.headers.update({'CMR-Search-After': response.headers['CMR-Search-After']})
-
+ cmr_search_after_header = ""
+ subquery_count = 0
+
+ while(cmr_search_after_header is not None):
try:
- response = get_page(session=opts.session, url=url, translated_opts=translated_opts, search_opts=query)
- except ASFSearchError as e:
+ items, subquery_max_results, cmr_search_after_header = query_cmr(opts.session, url, translated_opts, subquery_count)
+ except (ASFSearchError, CMRIncompleteError) as e:
message = str(e)
logging.error(message)
report_search_error(query, message)
opts.session.headers.pop('CMR-Search-After', None)
return
+
+ opts.session.headers.update({'CMR-Search-After': cmr_search_after_header})
+ last_page = process_page(items, maxResults, subquery_max_results, total, subquery_count, opts)
+ subquery_count += len(last_page)
+ total += len(last_page)
+ last_page.searchComplete = subquery_count == subquery_max_results or total == maxResults
+ yield last_page
+
+ if last_page.searchComplete:
+ if total == maxResults: # the user has as many results as they wanted
+ opts.session.headers.pop('CMR-Search-After', None)
+ return
+ else: # or we've gotten all possible results for this subquery
+ cmr_search_after_header = None
+
+ opts.session.headers.pop('CMR-Search-After', None)
- hits = [ASFProduct(f, session=query.session) for f in response.json()['items']]
-
- if len(hits):
- if maxResults != None:
- last_page = ASFSearchResults(hits[:min(maxResults - count, len(hits))], opts=opts)
- count += len(last_page)
- # results.extend(hits[:min(maxResults - len(results), len(hits))])
- if count == maxResults:
- last_page.searchComplete = True
- yield last_page
- return
- else:
- yield last_page
- else:
- count += len(hits)
- yield ASFSearchResults(hits, opts=opts)
- opts.session.headers.pop('CMR-Search-After', None)
+ at retry(reraise=True,
+ retry=retry_if_exception_type(CMRIncompleteError),
+ wait=wait_fixed(2),
+ stop=stop_after_attempt(3),
+ )
+def query_cmr(session: ASFSession, url: str, translated_opts: dict, sub_query_count: int):
+ response = get_page(session=session, url=url, translated_opts=translated_opts)
+
+ items = [ASFProduct(f, session=session) for f in response.json()['items']]
+ hits: int = response.json()['hits'] # total count of products given search opts
+
+ # sometimes CMR returns results with the wrong page size
+ if len(items) != INTERNAL.CMR_PAGE_SIZE and len(items) + sub_query_count < hits:
+ raise CMRIncompleteError(f"CMR returned page of incomplete results. Expected {min(INTERNAL.CMR_PAGE_SIZE, hits - sub_query_count)} results, got {len(items)}")
+
+ return items, hits, response.headers.get('CMR-Search-After', None)
+
+
+def process_page(items: list[ASFProduct], max_results: int, subquery_max_results: int, total: int, subquery_count: int, opts: ASFSearchOptions):
+ if max_results is None:
+ last_page = ASFSearchResults(items[:min(subquery_max_results - subquery_count, len(items))], opts=opts)
+ else:
+ last_page = ASFSearchResults(items[:min(max_results - total, len(items))], opts=opts)
+ return last_page
@retry(reraise=True,
- retry=retry_if_exception_type((TimeoutError, ASFSearch5xxError)),
- wait=wait_exponential(multiplier=1, min=4, max=10),
- stop=stop_after_delay(340),
+ retry=retry_if_exception_type(ASFSearch5xxError),
+ wait=wait_exponential(multiplier=1, min=3, max=10), # Wait 2^x * 1 starting with 3 seconds, max 10 seconds between retries
+ stop=stop_after_attempt(3),
)
-def get_page(session: ASFSession, url: str, translated_opts: list, search_opts: ASFSearchOptions) -> Response:
+def get_page(session: ASFSession, url: str, translated_opts: list) -> Response:
try:
- response = session.post(url=url, data=translated_opts, timeout=170)
+ response = session.post(url=url, data=translated_opts, timeout=30)
response.raise_for_status()
except HTTPError as exc:
error_message = f'HTTP {response.status_code}: {response.json()["errors"]}'
=====================================
tests/Search/test_search.py
=====================================
@@ -30,7 +30,7 @@ def run_test_ASFSearchResults(search_resp):
def run_test_search(search_parameters, answer):
with requests_mock.Mocker() as m:
- m.post(f"https://{INTERNAL.CMR_HOST}{INTERNAL.CMR_GRANULE_PATH}", json={'items': answer})
+ m.post(f"https://{INTERNAL.CMR_HOST}{INTERNAL.CMR_GRANULE_PATH}", json={'items': answer, 'hits': len(answer)})
response = search(**search_parameters)
if search_parameters.get("maxResults", False):
=====================================
tests/yml_tests/test_search_generator.yml
=====================================
@@ -25,15 +25,3 @@ tests:
maxResults: 250,
platform: 'UAVSAR'
}
-
-- test-ASFSearch-search-valid S1 ALOS:
- parameters: [
- {
- maxResults: 750,
- platform: 'Sentinel-1'
- },
- {
- maxResults: 750,
- platform: 'ALOS'
- },
- ]
View it on GitLab: https://salsa.debian.org/debian-gis-team/asf-search/-/commit/91f88ca0302e647a158a30559425b1b3b94368c1
--
View it on GitLab: https://salsa.debian.org/debian-gis-team/asf-search/-/commit/91f88ca0302e647a158a30559425b1b3b94368c1
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-grass-devel/attachments/20230812/53f1c1db/attachment-0001.htm>
More information about the Pkg-grass-devel
mailing list