[Git][debian-gis-team/asf-search][upstream] New upstream version 6.6.2

Antonio Valentino (@antonio.valentino) gitlab at salsa.debian.org
Sat Aug 12 16:05:58 BST 2023



Antonio Valentino pushed to branch upstream at Debian GIS Project / asf-search


Commits:
91f88ca0 by Antonio Valentino at 2023-08-12T15:01:28+00:00
New upstream version 6.6.2
- - - - -


7 changed files:

- CHANGELOG.md
- asf_search/exceptions.py
- asf_search/search/baseline_search.py
- asf_search/search/search_count.py
- asf_search/search/search_generator.py
- tests/Search/test_search.py
- tests/yml_tests/test_search_generator.yml


Changes:

=====================================
CHANGELOG.md
=====================================
@@ -25,6 +25,15 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 -
 
 -->
+------
+## [v6.6.2](https://github.com/asfadmin/Discovery-asf_search/compare/v6.6.1...v6.6.2)
+### Added
+- Adds new `CMRIncompleteError` exception, raised by search methods when CMR returns an incomplete page
+### Fixed
+- Fixes bug in `search_generator()` causing results to sometimes wrongly be marked as incomplete
+### Changed
+- `stack_from_id()` now raises if results are incomplete, before checking if reference was found
+
 ------
 ## [v6.6.1](https://github.com/asfadmin/Discovery-asf_search/compare/v6.6.0...v6.6.1)
 ### Added


=====================================
asf_search/exceptions.py
=====================================
@@ -37,3 +37,6 @@ class CMRError(Exception):
 
 class CMRConceptIDError(CMRError):
     """Raise when CMR encounters a concept-id error"""
+
+class CMRIncompleteError(CMRError):
+    """Raise when CMR returns an incomplete page of results"""


=====================================
asf_search/search/baseline_search.py
=====================================
@@ -45,6 +45,7 @@ def stack_from_product(
 
     return stack
 
+
 def stack_from_id(
         reference_id: str,
         opts: ASFSearchOptions = None
@@ -60,8 +61,11 @@ def stack_from_id(
 
     opts = (ASFSearchOptions() if opts is None else copy(opts))
 
-    reference_results = product_search(product_list=reference_id, opts=opts)
 
+    reference_results = product_search(product_list=reference_id, opts=opts)
+    
+    reference_results.raise_if_incomplete()
+    
     if len(reference_results) <= 0:
         raise ASFSearchError(f'Reference product not found: {reference_id}')
     reference = reference_results[0]


=====================================
asf_search/search/search_count.py
=====================================
@@ -61,6 +61,6 @@ def search_count(
         idx = translated_opts.index(('page_size', INTERNAL.CMR_PAGE_SIZE))
         translated_opts[idx] = ('page_size', 0)
         
-        response = get_page(session=opts.session, url=url, translated_opts=translated_opts, search_opts=query)
+        response = get_page(session=opts.session, url=url, translated_opts=translated_opts)
         count += response.json()['hits']
     return count


=====================================
asf_search/search/search_generator.py
=====================================
@@ -3,7 +3,7 @@ from typing import Generator, Union, Iterable, Tuple
 from copy import copy
 from requests.exceptions import HTTPError
 from requests import ReadTimeout, Response
-from tenacity import retry, retry_if_exception_type, stop_after_delay, wait_exponential
+from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential, wait_fixed
 import datetime
 import dateparser
 import warnings
@@ -15,7 +15,7 @@ from asf_search.ASFSearchOptions import ASFSearchOptions
 from asf_search.CMR import build_subqueries, translate_opts
 from asf_search.ASFSession import ASFSession
 from asf_search.ASFProduct import ASFProduct
-from asf_search.exceptions import ASFSearch4xxError, ASFSearch5xxError, ASFSearchError
+from asf_search.exceptions import ASFSearch4xxError, ASFSearch5xxError, ASFSearchError, CMRIncompleteError
 from asf_search.constants import INTERNAL
 from asf_search.WKT.validate_wkt import validate_wkt
 from asf_search.search.error_reporting import report_search_error
@@ -78,78 +78,75 @@ def search_generator(
     preprocess_opts(opts)
 
     url = '/'.join(s.strip('/') for s in [f'https://{opts.host}', f'{INTERNAL.CMR_GRANULE_PATH}'])
-    count = 0
+    total = 0
     
-    for query in build_subqueries(opts):
+    queries = build_subqueries(opts)
+    for query in queries:
         translated_opts = translate_opts(query)
-        try:
-            response = get_page(session=opts.session, url=url, translated_opts=translated_opts, search_opts=query)
-        except ASFSearchError as e:
-            message = str(e)
-            logging.error(message)
-            report_search_error(query, message)
-            opts.session.headers.pop('CMR-Search-After', None)
-            return
-
-        hits = [ASFProduct(f, session=query.session) for f in response.json()['items']]
-
-        if 'CMR-Search-After' in response.headers:
-            opts.session.headers.update({'CMR-Search-After': response.headers['CMR-Search-After']})
-
-        if maxResults != None:
-            last_page = ASFSearchResults(hits[:min(maxResults - count, len(hits))], opts=opts)
-            count += len(last_page)
-            
-            if count == maxResults:
-                last_page.searchComplete = True
-                yield last_page
-                return
-            else:
-                yield last_page
-        else:
-            count += len(hits)
-            yield ASFSearchResults(hits, opts=opts)
-
-        while('CMR-Search-After' in response.headers):
-            opts.session.headers.update({'CMR-Search-After': response.headers['CMR-Search-After']})
-
+        cmr_search_after_header = ""
+        subquery_count = 0
+        
+        while(cmr_search_after_header is not None):
             try:
-                response = get_page(session=opts.session, url=url, translated_opts=translated_opts, search_opts=query)
-            except ASFSearchError as e:
+                items, subquery_max_results, cmr_search_after_header = query_cmr(opts.session, url, translated_opts, subquery_count)
+            except (ASFSearchError, CMRIncompleteError) as e:
                 message = str(e)
                 logging.error(message)
                 report_search_error(query, message)
                 opts.session.headers.pop('CMR-Search-After', None)
                 return
+            
+            opts.session.headers.update({'CMR-Search-After': cmr_search_after_header})
+            last_page = process_page(items, maxResults, subquery_max_results, total, subquery_count, opts)
+            subquery_count += len(last_page)
+            total += len(last_page)
+            last_page.searchComplete = subquery_count == subquery_max_results or total == maxResults
+            yield last_page
+            
+            if last_page.searchComplete:
+                if total == maxResults: # the user has as many results as they wanted
+                    opts.session.headers.pop('CMR-Search-After', None)
+                    return
+                else: # or we've gotten all possible results for this subquery
+                    cmr_search_after_header = None
+        
+        opts.session.headers.pop('CMR-Search-After', None)
 
-            hits = [ASFProduct(f, session=query.session) for f in response.json()['items']]
-
-            if len(hits):
-                if maxResults != None:
-                    last_page = ASFSearchResults(hits[:min(maxResults - count, len(hits))], opts=opts)
-                    count += len(last_page)
-                    # results.extend(hits[:min(maxResults - len(results), len(hits))])
-                    if count == maxResults:
-                        last_page.searchComplete = True
-                        yield last_page
-                        return
-                    else:
-                        yield last_page
-                else:
-                    count += len(hits)
-                    yield ASFSearchResults(hits, opts=opts)
 
-        opts.session.headers.pop('CMR-Search-After', None)
+ at retry(reraise=True,
+       retry=retry_if_exception_type(CMRIncompleteError),
+       wait=wait_fixed(2),
+       stop=stop_after_attempt(3),
+    )
+def query_cmr(session: ASFSession, url: str, translated_opts: dict, sub_query_count: int):
+    response = get_page(session=session, url=url, translated_opts=translated_opts)
+
+    items = [ASFProduct(f, session=session) for f in response.json()['items']]
+    hits: int = response.json()['hits'] # total count of products given search opts
+
+    # sometimes CMR returns results with the wrong page size
+    if len(items) != INTERNAL.CMR_PAGE_SIZE and len(items) + sub_query_count < hits:
+        raise CMRIncompleteError(f"CMR returned page of incomplete results. Expected {min(INTERNAL.CMR_PAGE_SIZE, hits - sub_query_count)} results, got {len(items)}")
+
+    return items, hits, response.headers.get('CMR-Search-After', None)
+    
+
+def process_page(items: list[ASFProduct], max_results: int, subquery_max_results: int, total: int, subquery_count: int, opts: ASFSearchOptions):
+    if max_results is None:
+        last_page = ASFSearchResults(items[:min(subquery_max_results - subquery_count, len(items))], opts=opts)
+    else:
+        last_page = ASFSearchResults(items[:min(max_results - total, len(items))], opts=opts)
+    return last_page
 
 
 @retry(reraise=True,
-       retry=retry_if_exception_type((TimeoutError, ASFSearch5xxError)),
-       wait=wait_exponential(multiplier=1, min=4, max=10),
-       stop=stop_after_delay(340),
+       retry=retry_if_exception_type(ASFSearch5xxError),
+       wait=wait_exponential(multiplier=1, min=3, max=10),  # Wait 2^x * 1 starting with 3 seconds, max 10 seconds between retries
+       stop=stop_after_attempt(3),
     )
-def get_page(session: ASFSession, url: str, translated_opts: list, search_opts: ASFSearchOptions) -> Response:
+def get_page(session: ASFSession, url: str, translated_opts: list) -> Response:
     try:
-        response = session.post(url=url, data=translated_opts, timeout=170)
+        response = session.post(url=url, data=translated_opts, timeout=30)
         response.raise_for_status()
     except HTTPError as exc:
         error_message = f'HTTP {response.status_code}: {response.json()["errors"]}'


=====================================
tests/Search/test_search.py
=====================================
@@ -30,7 +30,7 @@ def run_test_ASFSearchResults(search_resp):
 
 def run_test_search(search_parameters, answer):
     with requests_mock.Mocker() as m:
-        m.post(f"https://{INTERNAL.CMR_HOST}{INTERNAL.CMR_GRANULE_PATH}", json={'items': answer})
+        m.post(f"https://{INTERNAL.CMR_HOST}{INTERNAL.CMR_GRANULE_PATH}", json={'items': answer, 'hits': len(answer)})
         response = search(**search_parameters)
 
         if search_parameters.get("maxResults", False):


=====================================
tests/yml_tests/test_search_generator.yml
=====================================
@@ -25,15 +25,3 @@ tests:
         maxResults: 250,
         platform: 'UAVSAR'
       }
-
-- test-ASFSearch-search-valid S1 ALOS:
-    parameters: [
-      {
-        maxResults: 750,
-        platform: 'Sentinel-1'
-      },
-      {
-        maxResults: 750,
-        platform: 'ALOS'
-      },
-    ]



View it on GitLab: https://salsa.debian.org/debian-gis-team/asf-search/-/commit/91f88ca0302e647a158a30559425b1b3b94368c1

-- 
View it on GitLab: https://salsa.debian.org/debian-gis-team/asf-search/-/commit/91f88ca0302e647a158a30559425b1b3b94368c1
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-grass-devel/attachments/20230812/53f1c1db/attachment-0001.htm>


More information about the Pkg-grass-devel mailing list