[med-svn] [Git][med-team/sourmash][master] skip branchwater, refresh sourmash.h

Michael R. Crusoe (@crusoe) gitlab at salsa.debian.org
Mon Oct 28 17:01:43 GMT 2024



Michael R. Crusoe pushed to branch master at Debian Med / sourmash


Commits:
1e81fd74 by Michael R. Crusoe at 2024-10-28T18:01:22+01:00
skip branchwater, refresh sourmash.h

- - - - -


3 changed files:

- debian/patches/series
- − debian/patches/skip-RevIndex-test
- debian/patches/skip-branchwater-feature


Changes:

=====================================
debian/patches/series
=====================================
@@ -1,3 +1,2 @@
 skip-branchwater-feature
 soften-deps
-skip-RevIndex-test


=====================================
debian/patches/skip-RevIndex-test deleted
=====================================
@@ -1,150 +0,0 @@
-Author: Michael R. Crusoe <crusoe at debian.org>
-Description: Temporarirly skip the RevIndex tests while we debug that
-Forwarded: not-needed
---- sourmash.orig/tests/test_index.py
-+++ sourmash/tests/test_index.py
-@@ -20,7 +20,6 @@
-     StandaloneManifestIndex,
- )
- from sourmash.signature import load_one_signature_from_json, save_signatures_to_json
--from sourmash.index.revindex import RevIndex
- from sourmash.sbt import SBT, GraphFactory
- from sourmash import sourmash_args
- from sourmash.search import JaccardSearch, SearchType
-@@ -1812,108 +1811,6 @@
-         assert ss_tup == ss_lazy_tup
- 
- 
--def test_revindex_index_search():
--    # confirm that RevIndex works
--    sig2 = utils.get_test_data("2.fa.sig")
--    sig47 = utils.get_test_data("47.fa.sig")
--    sig63 = utils.get_test_data("63.fa.sig")
--
--    ss2 = load_one_signature_from_json(sig2, ksize=31)
--    ss47 = load_one_signature_from_json(sig47)
--    ss63 = load_one_signature_from_json(sig63)
--
--    lidx = RevIndex(template=ss2.minhash)
--    lidx.insert(ss2)
--    lidx.insert(ss47)
--    lidx.insert(ss63)
--
--    # now, search for sig2
--    sr = lidx.search(ss2, threshold=1.0)
--    print([s[1].name for s in sr])
--    assert len(sr) == 1
--    assert sr[0][1] == ss2
--
--    # search for sig47 with lower threshold; search order not guaranteed.
--    sr = lidx.search(ss47, threshold=0.1)
--    print([s[1].name for s in sr])
--    assert len(sr) == 2
--    sr.sort(key=lambda x: -x[0])
--    assert sr[0][1] == ss47
--    assert sr[1][1] == ss63
--
--    # search for sig63 with lower threshold; search order not guaranteed.
--    sr = lidx.search(ss63, threshold=0.1)
--    print([s[1].name for s in sr])
--    assert len(sr) == 2
--    sr.sort(key=lambda x: -x[0])
--    assert sr[0][1] == ss63
--    assert sr[1][1] == ss47
--
--    # search for sig63 with high threshold => 1 match
--    sr = lidx.search(ss63, threshold=0.8)
--    print([s[1].name for s in sr])
--    assert len(sr) == 1
--    sr.sort(key=lambda x: -x[0])
--    assert sr[0][1] == ss63
--
--
--def test_revindex_gather():
--    # check that RevIndex.best_containment works.
--    sig2 = utils.get_test_data("2.fa.sig")
--    sig47 = utils.get_test_data("47.fa.sig")
--    sig63 = utils.get_test_data("63.fa.sig")
--
--    ss2 = load_one_signature_from_json(sig2, ksize=31)
--    ss47 = load_one_signature_from_json(sig47)
--    ss63 = load_one_signature_from_json(sig63)
--
--    lidx = RevIndex(template=ss2.minhash)
--    lidx.insert(ss2)
--    lidx.insert(ss47)
--    lidx.insert(ss63)
--
--    match = lidx.best_containment(ss2)
--    assert match
--    assert match.score == 1.0
--    assert match.signature == ss2
--
--    match = lidx.best_containment(ss47)
--    assert match
--    assert match.score == 1.0
--    assert match.signature == ss47
--
--
--def test_revindex_gather_ignore():
--    # check that RevIndex gather ignores things properly.
--    sig2 = utils.get_test_data("2.fa.sig")
--    sig47 = utils.get_test_data("47.fa.sig")
--    sig63 = utils.get_test_data("63.fa.sig")
--
--    ss2 = load_one_signature_from_json(sig2, ksize=31)
--    ss47 = load_one_signature_from_json(sig47, ksize=31)
--    ss63 = load_one_signature_from_json(sig63, ksize=31)
--
--    # construct an index...
--    lidx = RevIndex(template=ss2.minhash, signatures=[ss2, ss47, ss63])
--
--    # ...now search with something that should ignore sig47, the exact match.
--    search_fn = JaccardSearchBestOnly_ButIgnore([ss47])
--
--    results = list(lidx.find(search_fn, ss47))
--    results = [ss.signature for ss in results]
--
--    def is_found(ss, xx):
--        for q in xx:
--            print(ss, ss.similarity(q))
--            if ss.similarity(q) == 1.0:
--                return True
--        return False
--
--    assert not is_found(ss47, results)
--    assert not is_found(ss2, results)
--    assert is_found(ss63, results)
--
--
- def test_standalone_manifest_signatures(runtmp):
-     # build a StandaloneManifestIndex and test 'signatures' method.
- 
---- sourmash.orig/tests/test_index_protocol.py
-+++ sourmash/tests/test_index_protocol.py
-@@ -18,7 +18,6 @@
- )
- from sourmash.index import CounterGather
- from sourmash.index.sqlite_index import SqliteIndex
--from sourmash.index.revindex import RevIndex
- from sourmash.sbt import SBT, GraphFactory
- from sourmash.manifest import CollectionManifest, BaseCollectionManifest
- from sourmash.lca.lca_db import LCA_Database, load_single_database
-@@ -147,17 +146,6 @@
-     return db
- 
- 
--def build_revindex(runtmp):
--    ss2, ss47, ss63 = _load_three_sigs()
--
--    lidx = RevIndex(template=ss2.minhash)
--    lidx.insert(ss2)
--    lidx.insert(ss47)
--    lidx.insert(ss63)
--
--    return lidx
--
--
- def build_lca_index_save_load_sql(runtmp):
-     db = build_lca_index(runtmp)
-     outfile = runtmp.output("db.lca.json")


=====================================
debian/patches/skip-branchwater-feature
=====================================
@@ -46,3 +46,732 @@ Forwarded: not-needed
 +#rocksdb = { version = "0.21.0", optional = true }
  [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
  criterion = "0.5.1"
+--- sourmash.orig/include/sourmash.h
++++ sourmash/include/sourmash.h
+@@ -56,8 +56,6 @@
+ 
+ typedef struct SourmashNodegraph SourmashNodegraph;
+ 
+-typedef struct SourmashRevIndex SourmashRevIndex;
+-
+ typedef struct SourmashSearchResult SourmashSearchResult;
+ 
+ typedef struct SourmashSignature SourmashSignature;
+@@ -312,43 +310,6 @@
+                                          uintptr_t starting_size,
+                                          uintptr_t n_tables);
+ 
+-void revindex_free(SourmashRevIndex *ptr);
+-
+-const SourmashSearchResult *const *revindex_gather(const SourmashRevIndex *ptr,
+-                                                   const SourmashSignature *sig_ptr,
+-                                                   double threshold,
+-                                                   bool _do_containment,
+-                                                   bool _ignore_abundance,
+-                                                   uintptr_t *size);
+-
+-uint64_t revindex_len(const SourmashRevIndex *ptr);
+-
+-SourmashRevIndex *revindex_new_with_paths(const SourmashStr *const *search_sigs_ptr,
+-                                          uintptr_t insigs,
+-                                          const SourmashKmerMinHash *template_ptr,
+-                                          uintptr_t threshold,
+-                                          const SourmashKmerMinHash *const *queries_ptr,
+-                                          uintptr_t inqueries,
+-                                          bool keep_sigs);
+-
+-SourmashRevIndex *revindex_new_with_sigs(const SourmashSignature *const *search_sigs_ptr,
+-                                         uintptr_t insigs,
+-                                         const SourmashKmerMinHash *template_ptr,
+-                                         uintptr_t threshold,
+-                                         const SourmashKmerMinHash *const *queries_ptr,
+-                                         uintptr_t inqueries);
+-
+-uint64_t revindex_scaled(const SourmashRevIndex *ptr);
+-
+-const SourmashSearchResult *const *revindex_search(const SourmashRevIndex *ptr,
+-                                                   const SourmashSignature *sig_ptr,
+-                                                   double threshold,
+-                                                   bool do_containment,
+-                                                   bool _ignore_abundance,
+-                                                   uintptr_t *size);
+-
+-SourmashSignature **revindex_signatures(const SourmashRevIndex *ptr, uintptr_t *size);
+-
+ SourmashStr searchresult_filename(const SourmashSearchResult *ptr);
+ 
+ void searchresult_free(SourmashSearchResult *ptr);
+--- sourmash.orig/src/core/cbindgen.toml
++++ sourmash/src/core/cbindgen.toml
+@@ -8,7 +8,6 @@
+ 
+ [parse.expand]
+ crates = ["sourmash"]
+-features = ["branchwater"]
+ 
+ [enum]
+ rename_variants = "QualifiedScreamingSnakeCase"
+--- sourmash.orig/src/core/src/ffi/index/revindex.rs
++++ sourmash/src/core/src/ffi/index/revindex.rs
+@@ -1,271 +0,0 @@
+-use std::slice;
+-
+-use camino::Utf8PathBuf as PathBuf;
+-
+-use crate::ffi::index::SourmashSearchResult;
+-use crate::ffi::minhash::SourmashKmerMinHash;
+-use crate::ffi::signature::SourmashSignature;
+-use crate::ffi::utils::{ForeignObject, SourmashStr};
+-use crate::index::revindex::mem_revindex::RevIndex;
+-use crate::index::Index;
+-use crate::prelude::*;
+-use crate::signature::{Signature, SigsTrait};
+-use crate::sketch::minhash::KmerMinHash;
+-use crate::sketch::Sketch;
+-
+-pub struct SourmashRevIndex;
+-
+-impl ForeignObject for SourmashRevIndex {
+-    type RustObject = RevIndex;
+-}
+-
+-// TODO: remove this when it is possible to pass Selection thru the FFI
+-fn from_template(template: &Sketch) -> Selection {
+-    let (num, scaled) = match template {
+-        Sketch::MinHash(mh) => (mh.num(), mh.scaled() as u32),
+-        Sketch::LargeMinHash(mh) => (mh.num(), mh.scaled() as u32),
+-        _ => unimplemented!(),
+-    };
+-
+-    Selection::builder()
+-        .ksize(template.ksize() as u32)
+-        .num(num)
+-        .scaled(scaled)
+-        .build()
+-}
+-
+-ffi_fn! {
+-unsafe fn revindex_new_with_paths(
+-    search_sigs_ptr: *const *const SourmashStr,
+-    insigs: usize,
+-    template_ptr: *const SourmashKmerMinHash,
+-    threshold: usize,
+-    queries_ptr: *const *const SourmashKmerMinHash,
+-    inqueries: usize,
+-    keep_sigs: bool,
+-) -> Result<*mut SourmashRevIndex> {
+-    let search_sigs: Vec<PathBuf> = {
+-        assert!(!search_sigs_ptr.is_null());
+-        slice::from_raw_parts(search_sigs_ptr, insigs)
+-            .iter()
+-            .map(|path| {
+-                let mut new_path = PathBuf::new();
+-                new_path.push(SourmashStr::as_rust(*path).as_str());
+-                new_path
+-            })
+-            .collect()
+-    };
+-
+-    let template = {
+-        assert!(!template_ptr.is_null());
+-        //TODO: avoid clone here
+-        Sketch::MinHash(SourmashKmerMinHash::as_rust(template_ptr).clone())
+-    };
+-
+-    let queries_vec: Vec<KmerMinHash>;
+-    let queries: Option<&[KmerMinHash]> = if queries_ptr.is_null() {
+-        None
+-    } else {
+-        queries_vec = slice::from_raw_parts(queries_ptr, inqueries)
+-            .iter()
+-            .map(|mh_ptr|
+-            // TODO: avoid this clone
+-          SourmashKmerMinHash::as_rust(*mh_ptr).clone())
+-            .collect();
+-        Some(queries_vec.as_ref())
+-    };
+-
+-    let selection = from_template(&template);
+-
+-    let revindex = RevIndex::new(
+-        search_sigs.as_ref(),
+-        &selection,
+-        threshold,
+-        queries,
+-        keep_sigs,
+-    )?;
+-    Ok(SourmashRevIndex::from_rust(revindex))
+-}
+-}
+-
+-ffi_fn! {
+-unsafe fn revindex_new_with_sigs(
+-    search_sigs_ptr: *const *const SourmashSignature,
+-    insigs: usize,
+-    template_ptr: *const SourmashKmerMinHash,
+-    threshold: usize,
+-    queries_ptr: *const *const SourmashKmerMinHash,
+-    inqueries: usize,
+-) -> Result<*mut SourmashRevIndex> {
+-    let search_sigs: Vec<Signature> = {
+-        assert!(!search_sigs_ptr.is_null());
+-        slice::from_raw_parts(search_sigs_ptr, insigs)
+-            .iter()
+-            .map(|sig| SourmashSignature::as_rust(*sig))
+-            .cloned()
+-            .collect()
+-    };
+-
+-    let template = {
+-        assert!(!template_ptr.is_null());
+-        //TODO: avoid clone here
+-        Sketch::MinHash(SourmashKmerMinHash::as_rust(template_ptr).clone())
+-    };
+-
+-    let queries_vec: Vec<KmerMinHash>;
+-    let queries: Option<&[KmerMinHash]> = if queries_ptr.is_null() {
+-        None
+-    } else {
+-        queries_vec = slice::from_raw_parts(queries_ptr, inqueries)
+-            .iter()
+-            .map(|mh_ptr|
+-            // TODO: avoid this clone
+-          SourmashKmerMinHash::as_rust(*mh_ptr).clone())
+-            .collect();
+-        Some(queries_vec.as_ref())
+-    };
+-
+-    let selection = from_template(&template);
+-    let revindex = RevIndex::new_with_sigs(search_sigs, &selection, threshold, queries)?;
+-    Ok(SourmashRevIndex::from_rust(revindex))
+-}
+-}
+-
+-#[no_mangle]
+-pub unsafe extern "C" fn revindex_free(ptr: *mut SourmashRevIndex) {
+-    SourmashRevIndex::drop(ptr);
+-}
+-
+-ffi_fn! {
+-unsafe fn revindex_search(
+-    ptr: *const SourmashRevIndex,
+-    sig_ptr: *const SourmashSignature,
+-    threshold: f64,
+-    do_containment: bool,
+-    _ignore_abundance: bool,
+-    size: *mut usize,
+-) -> Result<*const *const SourmashSearchResult> {
+-    let revindex = SourmashRevIndex::as_rust(ptr);
+-    let sig = SourmashSignature::as_rust(sig_ptr);
+-
+-    if sig.signatures.is_empty() {
+-        *size = 0;
+-        return Ok(std::ptr::null::<*const SourmashSearchResult>());
+-    }
+-
+-    let mh = if let Sketch::MinHash(mh) = &sig.signatures[0] {
+-        mh
+-    } else {
+-        // TODO: what if it is not a mh?
+-        unimplemented!()
+-    };
+-
+-    let results: Vec<(f64, Signature, String)> = revindex
+-        .find_signatures(mh, threshold, do_containment, true)?
+-        .into_iter()
+-        .collect();
+-
+-    // FIXME: use the ForeignObject trait, maybe define new method there...
+-    let ptr_sigs: Vec<*const SourmashSearchResult> = results
+-        .into_iter()
+-        .map(|x| Box::into_raw(Box::new(x)) as *const SourmashSearchResult)
+-        .collect();
+-
+-    let b = ptr_sigs.into_boxed_slice();
+-    *size = b.len();
+-
+-    Ok(Box::into_raw(b) as *const *const SourmashSearchResult)
+-}
+-}
+-
+-ffi_fn! {
+-unsafe fn revindex_gather(
+-    ptr: *const SourmashRevIndex,
+-    sig_ptr: *const SourmashSignature,
+-    threshold: f64,
+-    _do_containment: bool,
+-    _ignore_abundance: bool,
+-    size: *mut usize,
+-) -> Result<*const *const SourmashSearchResult> {
+-    let revindex = SourmashRevIndex::as_rust(ptr);
+-    let sig = SourmashSignature::as_rust(sig_ptr);
+-
+-    if sig.signatures.is_empty() {
+-        *size = 0;
+-        return Ok(std::ptr::null::<*const SourmashSearchResult>());
+-    }
+-
+-    let mh = if let Sketch::MinHash(mh) = &sig.signatures[0] {
+-        mh
+-    } else {
+-        // TODO: what if it is not a mh?
+-        unimplemented!()
+-    };
+-
+-    // TODO: proper threshold calculation
+-    let threshold: usize = (threshold * (mh.size() as f64)) as _;
+-
+-    let counter = revindex.counter_for_query(mh);
+-    dbg!(&counter);
+-
+-    let results: Vec<(f64, Signature, String)> = revindex
+-        .gather(counter, threshold, mh)
+-        .unwrap() // TODO: proper error handling
+-        .into_iter()
+-        .map(|r| {
+-            let filename = r.filename().to_owned();
+-            let sig = r.get_match();
+-            (r.f_match(), sig, filename)
+-        })
+-        .collect();
+-
+-    // FIXME: use the ForeignObject trait, maybe define new method there...
+-    let ptr_sigs: Vec<*const SourmashSearchResult> = results
+-        .into_iter()
+-        .map(|x| Box::into_raw(Box::new(x)) as *const SourmashSearchResult)
+-        .collect();
+-
+-    let b = ptr_sigs.into_boxed_slice();
+-    *size = b.len();
+-
+-    Ok(Box::into_raw(b) as *const *const SourmashSearchResult)
+-}
+-}
+-
+-#[no_mangle]
+-pub unsafe extern "C" fn revindex_scaled(ptr: *const SourmashRevIndex) -> u64 {
+-    let revindex = SourmashRevIndex::as_rust(ptr);
+-    if let Sketch::MinHash(mh) = revindex.template() {
+-        mh.scaled()
+-    } else {
+-        unimplemented!()
+-    }
+-}
+-
+-#[no_mangle]
+-pub unsafe extern "C" fn revindex_len(ptr: *const SourmashRevIndex) -> u64 {
+-    let revindex = SourmashRevIndex::as_rust(ptr);
+-    revindex.len() as u64
+-}
+-
+-ffi_fn! {
+-unsafe fn revindex_signatures(
+-    ptr: *const SourmashRevIndex,
+-    size: *mut usize,
+-) -> Result<*mut *mut SourmashSignature> {
+-    let revindex = SourmashRevIndex::as_rust(ptr);
+-
+-    let sigs = revindex.signatures();
+-
+-    // FIXME: use the ForeignObject trait, maybe define new method there...
+-    let ptr_sigs: Vec<*mut SourmashSignature> = sigs
+-        .into_iter()
+-        .map(|x| Box::into_raw(Box::new(x)) as *mut SourmashSignature)
+-        .collect();
+-
+-    let b = ptr_sigs.into_boxed_slice();
+-    *size = b.len();
+-
+-    Ok(Box::into_raw(b) as *mut *mut SourmashSignature)
+-}
+-}
+--- sourmash.orig/src/sourmash/index/revindex.py
++++ sourmash/src/sourmash/index/revindex.py
+@@ -1,240 +1,3 @@
+-"""
+-RevIndex - a rust-based reverse index by hashes.
+-"""
+-
+-import weakref
+-
+-from sourmash.index import Index, IndexSearchResult
+-from sourmash.minhash import MinHash
+-from sourmash.signature import SourmashSignature
+-from sourmash._lowlevel import ffi, lib
+-from sourmash.utils import RustObject, rustcall, decode_str, encode_str
+-
+-
+-class RevIndex(RustObject, Index):
+-    __dealloc_func__ = lib.revindex_free
+-
+-    def __init__(
+-        self,
+-        *,
+-        signatures=None,
+-        signature_paths=None,
+-        template=None,
+-        threshold=0,
+-        queries=None,
+-        keep_sigs=False,
+-    ):
+-        self.template = template
+-        self.threshold = threshold
+-        self.queries = queries
+-        self.keep_sigs = keep_sigs
+-        self.signature_paths = signature_paths
+-        self._signatures = signatures
+-
+-        if signature_paths is None or signatures is None:
+-            # delay initialization
+-            self._objptr = ffi.NULL
+-        else:
+-            self._init_inner()
+-
+-    def _init_inner(self):
+-        if self._objptr != ffi.NULL:
+-            # Already initialized
+-            return
+-
+-        if (
+-            self.signature_paths is None
+-            and not self._signatures
+-            and self._objptr == ffi.NULL
+-        ):
+-            raise ValueError("No signatures provided")
+-        elif (self.signature_paths or self._signatures) and self._objptr != ffi.NULL:
+-            raise NotImplementedError("Need to update RevIndex")
+-
+-        attached_refs = weakref.WeakKeyDictionary()
+-
+-        queries_ptr = ffi.NULL
+-        queries_size = 0
+-        if self.queries:
+-            # get list of rust objects
+-            collected = []
+-            for obj in queries:
+-                rv = obj._get_objptr()
+-                attached_refs[rv] = obj
+-                collected.append(rv)
+-            queries_ptr = ffi.new("SourmashSignature*[]", collected)
+-            queries_size = len(queries)
+-
+-        template_ptr = ffi.NULL
+-        if self.template:
+-            if isinstance(self.template, MinHash):
+-                template_ptr = self.template._get_objptr()
+-            else:
+-                raise ValueError("Template must be a MinHash")
+-
+-        search_sigs_ptr = ffi.NULL
+-        sigs_size = 0
+-        collected = []
+-        if self.signature_paths:
+-            for path in self.signature_paths:
+-                collected.append(encode_str(path))
+-            search_sigs_ptr = ffi.new("SourmashStr*[]", collected)
+-            sigs_size = len(signature_paths)
+-
+-            self._objptr = rustcall(
+-                lib.revindex_new_with_paths,
+-                search_sigs_ptr,
+-                sigs_size,
+-                template_ptr,
+-                self.threshold,
+-                queries_ptr,
+-                queries_size,
+-                self.keep_sigs,
+-            )
+-        elif self._signatures:
+-            # force keep_sigs=True, and pass SourmashSignature directly to RevIndex.
+-            for sig in self._signatures:
+-                collected.append(sig._get_objptr())
+-            search_sigs_ptr = ffi.new("SourmashSignature*[]", collected)
+-            sigs_size = len(self._signatures)
+-
+-            self._objptr = rustcall(
+-                lib.revindex_new_with_sigs,
+-                search_sigs_ptr,
+-                sigs_size,
+-                template_ptr,
+-                self.threshold,
+-                queries_ptr,
+-                queries_size,
+-            )
+-
+-    def signatures(self):
+-        self._init_inner()
+-
+-        size = ffi.new("uintptr_t *")
+-        sigs_ptr = self._methodcall(lib.revindex_signatures, size)
+-        size = size[0]
+-
+-        sigs = []
+-        for i in range(size):
+-            sig = SourmashSignature._from_objptr(sigs_ptr[i])
+-            sigs.append(sig)
+-
+-        for sig in sigs:
+-            yield sig
+-
+-        # if self._signatures:
+-        #    yield from self._signatures
+-        # else:
+-        #    raise NotImplementedError("Call into Rust and retrieve sigs")
+-
+-    def __len__(self):
+-        if self._objptr:
+-            return self._methodcall(lib.revindex_len)
+-        else:
+-            return len(self._signatures)
+-
+-    def insert(self, node):
+-        if self._signatures is None:
+-            self._signatures = []
+-        self._signatures.append(node)
+-
+-    def save(self, path):
+-        pass
+-
+-    @classmethod
+-    def load(cls, location):
+-        pass
+-
+-    def select(self, ksize=None, moltype=None, **kwargs):
+-        if self.template:
+-            if ksize:
+-                self.template.ksize = ksize
+-            if moltype:
+-                self.template.moltype = moltype
+-        else:
+-            # TODO: deal with None/default values
+-            self.template = MinHash(ksize=ksize, moltype=moltype)
+-
+-    #    def search(self, query, *args, **kwargs):
+-    #        """Return set of matches with similarity above 'threshold'.
+-    #
+-    #        Results will be sorted by similarity, highest to lowest.
+-    #
+-    #        Optional arguments:
+-    #          * do_containment: default False. If True, use Jaccard containment.
+-    #          * ignore_abundance: default False. If True, and query signature
+-    #            and database support k-mer abundances, ignore those abundances.
+-    #
+-    #        Note, the "best only" hint is ignored by LCA_Database
+-    #        """
+-    #        if not query.minhash:
+-    #            return []
+-    #
+-    #        # check arguments
+-    #        if "threshold" not in kwargs:
+-    #            raise TypeError("'search' requires 'threshold'")
+-    #        threshold = kwargs["threshold"]
+-    #        do_containment = kwargs.get("do_containment", False)
+-    #        ignore_abundance = kwargs.get("ignore_abundance", False)
+-    #
+-    #        self._init_inner()
+-    #
+-    #        size = ffi.new("uintptr_t *")
+-    #        results_ptr = self._methodcall(
+-    #            lib.revindex_search,
+-    #            query._get_objptr(),
+-    #            threshold,
+-    #            do_containment,
+-    #            ignore_abundance,
+-    #            size,
+-    #        )
+-    #
+-    #        size = size[0]
+-    #        if size == 0:
+-    #            return []
+-    #
+-    #        results = []
+-    #        for i in range(size):
+-    #            match = SearchResult._from_objptr(results_ptr[i])
+-    #            if match.score >= threshold:
+-    #                results.append(IndexSearchResult(match.score, match.signature, match.filename))
+-    #
+-    #        return results
+-    #
+-    #    def gather(self, query, *args, **kwargs):
+-    #        "Return the match with the best Jaccard containment in the database."
+-    #        if not query.minhash:
+-    #            return []
+-    #
+-    #        self._init_inner()
+-    #
+-    #        threshold_bp = kwargs.get("threshold_bp", 0.0)
+-    #        threshold = threshold_bp / (len(query.minhash) * self.scaled)
+-    #
+-    #        results = []
+-    #        size = ffi.new("uintptr_t *")
+-    #        results_ptr = self._methodcall(
+-    #            lib.revindex_gather, query._get_objptr(), threshold, True, True, size
+-    #        )
+-    #        size = size[0]
+-    #        if size == 0:
+-    #            return []
+-    #
+-    #        results = []
+-    #        for i in range(size):
+-    #            match = SearchResult._from_objptr(results_ptr[i])
+-    #            if match.score >= threshold:
+-    #                results.append(IndexSearchResult(match.score, match.signature, match.filename))
+-    #
+-    #        results.sort(reverse=True,
+-    #                     key=lambda x: (x.score, x.signature.md5sum()))
+-    #
+-    #        return results[:1]
+-
+-    @property
+-    def scaled(self):
+-        return self._methodcall(lib.revindex_scaled)
+ 
+ 
+ class SearchResult(RustObject):
+--- sourmash.orig/tests/test_index.py
++++ sourmash/tests/test_index.py
+@@ -20,7 +20,6 @@
+     StandaloneManifestIndex,
+ )
+ from sourmash.signature import load_one_signature_from_json, save_signatures_to_json
+-from sourmash.index.revindex import RevIndex
+ from sourmash.sbt import SBT, GraphFactory
+ from sourmash import sourmash_args
+ from sourmash.search import JaccardSearch, SearchType
+@@ -1812,108 +1811,6 @@
+         assert ss_tup == ss_lazy_tup
+ 
+ 
+-def test_revindex_index_search():
+-    # confirm that RevIndex works
+-    sig2 = utils.get_test_data("2.fa.sig")
+-    sig47 = utils.get_test_data("47.fa.sig")
+-    sig63 = utils.get_test_data("63.fa.sig")
+-
+-    ss2 = load_one_signature_from_json(sig2, ksize=31)
+-    ss47 = load_one_signature_from_json(sig47)
+-    ss63 = load_one_signature_from_json(sig63)
+-
+-    lidx = RevIndex(template=ss2.minhash)
+-    lidx.insert(ss2)
+-    lidx.insert(ss47)
+-    lidx.insert(ss63)
+-
+-    # now, search for sig2
+-    sr = lidx.search(ss2, threshold=1.0)
+-    print([s[1].name for s in sr])
+-    assert len(sr) == 1
+-    assert sr[0][1] == ss2
+-
+-    # search for sig47 with lower threshold; search order not guaranteed.
+-    sr = lidx.search(ss47, threshold=0.1)
+-    print([s[1].name for s in sr])
+-    assert len(sr) == 2
+-    sr.sort(key=lambda x: -x[0])
+-    assert sr[0][1] == ss47
+-    assert sr[1][1] == ss63
+-
+-    # search for sig63 with lower threshold; search order not guaranteed.
+-    sr = lidx.search(ss63, threshold=0.1)
+-    print([s[1].name for s in sr])
+-    assert len(sr) == 2
+-    sr.sort(key=lambda x: -x[0])
+-    assert sr[0][1] == ss63
+-    assert sr[1][1] == ss47
+-
+-    # search for sig63 with high threshold => 1 match
+-    sr = lidx.search(ss63, threshold=0.8)
+-    print([s[1].name for s in sr])
+-    assert len(sr) == 1
+-    sr.sort(key=lambda x: -x[0])
+-    assert sr[0][1] == ss63
+-
+-
+-def test_revindex_gather():
+-    # check that RevIndex.best_containment works.
+-    sig2 = utils.get_test_data("2.fa.sig")
+-    sig47 = utils.get_test_data("47.fa.sig")
+-    sig63 = utils.get_test_data("63.fa.sig")
+-
+-    ss2 = load_one_signature_from_json(sig2, ksize=31)
+-    ss47 = load_one_signature_from_json(sig47)
+-    ss63 = load_one_signature_from_json(sig63)
+-
+-    lidx = RevIndex(template=ss2.minhash)
+-    lidx.insert(ss2)
+-    lidx.insert(ss47)
+-    lidx.insert(ss63)
+-
+-    match = lidx.best_containment(ss2)
+-    assert match
+-    assert match.score == 1.0
+-    assert match.signature == ss2
+-
+-    match = lidx.best_containment(ss47)
+-    assert match
+-    assert match.score == 1.0
+-    assert match.signature == ss47
+-
+-
+-def test_revindex_gather_ignore():
+-    # check that RevIndex gather ignores things properly.
+-    sig2 = utils.get_test_data("2.fa.sig")
+-    sig47 = utils.get_test_data("47.fa.sig")
+-    sig63 = utils.get_test_data("63.fa.sig")
+-
+-    ss2 = load_one_signature_from_json(sig2, ksize=31)
+-    ss47 = load_one_signature_from_json(sig47, ksize=31)
+-    ss63 = load_one_signature_from_json(sig63, ksize=31)
+-
+-    # construct an index...
+-    lidx = RevIndex(template=ss2.minhash, signatures=[ss2, ss47, ss63])
+-
+-    # ...now search with something that should ignore sig47, the exact match.
+-    search_fn = JaccardSearchBestOnly_ButIgnore([ss47])
+-
+-    results = list(lidx.find(search_fn, ss47))
+-    results = [ss.signature for ss in results]
+-
+-    def is_found(ss, xx):
+-        for q in xx:
+-            print(ss, ss.similarity(q))
+-            if ss.similarity(q) == 1.0:
+-                return True
+-        return False
+-
+-    assert not is_found(ss47, results)
+-    assert not is_found(ss2, results)
+-    assert is_found(ss63, results)
+-
+-
+ def test_standalone_manifest_signatures(runtmp):
+     # build a StandaloneManifestIndex and test 'signatures' method.
+ 
+--- sourmash.orig/tests/test_index_protocol.py
++++ sourmash/tests/test_index_protocol.py
+@@ -18,7 +18,6 @@
+ )
+ from sourmash.index import CounterGather
+ from sourmash.index.sqlite_index import SqliteIndex
+-from sourmash.index.revindex import RevIndex
+ from sourmash.sbt import SBT, GraphFactory
+ from sourmash.manifest import CollectionManifest, BaseCollectionManifest
+ from sourmash.lca.lca_db import LCA_Database, load_single_database
+@@ -147,17 +146,6 @@
+     return db
+ 
+ 
+-def build_revindex(runtmp):
+-    ss2, ss47, ss63 = _load_three_sigs()
+-
+-    lidx = RevIndex(template=ss2.minhash)
+-    lidx.insert(ss2)
+-    lidx.insert(ss47)
+-    lidx.insert(ss63)
+-
+-    return lidx
+-
+-
+ def build_lca_index_save_load_sql(runtmp):
+     db = build_lca_index(runtmp)
+     outfile = runtmp.output("db.lca.json")



View it on GitLab: https://salsa.debian.org/med-team/sourmash/-/commit/1e81fd744ffd4e764bbb25648417b9da0701e060

-- 
View it on GitLab: https://salsa.debian.org/med-team/sourmash/-/commit/1e81fd744ffd4e764bbb25648417b9da0701e060
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20241028/5bf5ceed/attachment-0001.htm>


More information about the debian-med-commit mailing list