Bug#999931: virtuoso-opensource: depends on obsolete pcre3 library

Yavor Doganov yavor at gnu.org
Mon Mar 4 14:36:45 GMT 2024


Hi Andreas,

On Wed, 28 Feb 2024 18:36:04 +0200,
Andreas Beckmann wrote:
> On Wed, 20 Dec 2023 21:18:20 +0200 Yavor Doganov <yavor at gnu.org> wrote:
> > Please find attached a patch;
> 
> Thanks for the patch, I uploaded it to Debian and so far noone
> complained ;-)

Thanks!  Complaints usually come a bit later...
 
> But the patch doesn't apply cleanly on newer virtuoso-opensource
> versions, there are actually changes in pcre usage in
> libsrc/Wi/sqlbif.h that require adjustments.

Right; there's a new function.

> Could you take a look again and update the patch?

Attached is a patch (commit made to the try-7.2.12 branch) that
updates pcre2.patch so that it applies cleanly and restores the
build-dependency on libpcre2-dev.

> I've never worked with (any version of) pcre (from the programmer
> persepective,

Likewise, I'm a complete novice here.  It would be nice to finish this
transition, though.
-------------- next part --------------
>From 82b97264413540aa72d96297a93a6fd24f56adc2 Mon Sep 17 00:00:00 2001
From: Yavor Doganov <yavor at gnu.org>
Date: Mon, 4 Mar 2024 16:29:34 +0200
Subject: [PATCH] pcre2.patch: Update for the new upstream release

---
 debian/changelog           |   4 +
 debian/control             |   2 +-
 debian/patches/pcre2.patch | 152 ++++++++++++++++++++++++-------------
 debian/patches/series      |   2 +-
 4 files changed, 107 insertions(+), 53 deletions(-)

diff --git a/debian/changelog b/debian/changelog
index 907e89fc6..a566f67f2 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,8 +1,12 @@
 virtuoso-opensource (7.2.12+dfsg-0.1) UNRELEASED; urgency=medium
 
+  [ Andreas Beckmann ]
   * Non-maintainer upload.
   * New upstream release.
 
+  [ Yavor Doganov ]
+  * debian/patches/pcre2.patch: Update for the new upstream release.
+
  -- Andreas Beckmann <anbe at debian.org>  Wed, 28 Feb 2024 15:19:59 +0100
 
 virtuoso-opensource (7.2.5.1+dfsg1-0.6) unstable; urgency=medium
diff --git a/debian/control b/debian/control
index 5fd05fab9..db9b573c4 100644
--- a/debian/control
+++ b/debian/control
@@ -14,7 +14,7 @@ Build-Depends: debhelper-compat (= 13),
                gperf,
                libldap2-dev,
                libmagickwand-dev,
-               libpcre3-dev,
+               libpcre2-dev,
                libreadline-dev,
                libssl-dev,
                libtirpc-dev,
diff --git a/debian/patches/pcre2.patch b/debian/patches/pcre2.patch
index 98c06be51..104c1abc9 100644
--- a/debian/patches/pcre2.patch
+++ b/debian/patches/pcre2.patch
@@ -2,12 +2,12 @@ Description: Port to PCRE2.
 Bug-Debian: https://bugs.debian.org/999931
 Author: Yavor Doganov <yavor at gnu.org>
 Forwarded: no
-Last-Update: 2023-12-20
+Last-Update: 2024-03-04
 ---
 
 --- virtuoso-opensource.orig/libsrc/Wi/Makefile.am
 +++ virtuoso-opensource/libsrc/Wi/Makefile.am
-@@ -559,7 +559,7 @@
+@@ -563,7 +563,7 @@
  	$(libwi_base_la_sources)
  
  libwi_la_CFLAGS  = $(libwi_base_la_cflags)
@@ -18,7 +18,7 @@ Last-Update: 2023-12-20
  libwi_odbc_la_SOURCES += \
 --- virtuoso-opensource.orig/libsrc/Wi/bif_regexp.c
 +++ virtuoso-opensource/libsrc/Wi/bif_regexp.c
-@@ -30,7 +30,8 @@
+@@ -31,7 +31,8 @@
  
  // Debian maintainer: replaced by external PCRE
  // #include "util/pcrelib/pcre.h"
@@ -28,7 +28,7 @@ Last-Update: 2023-12-20
  
  /*
     typedef struct rx_query_s {
-@@ -66,15 +67,16 @@
+@@ -65,16 +66,17 @@
  typedef struct compiled_regexp_s
  {
    int refctr;
@@ -42,14 +42,16 @@ Last-Update: 2023-12-20
  
 -int32 c_pcre_match_limit_recursion = 500;
 -int32 c_pcre_match_limit = 100000;
+-int32 pcre_max_cache_sz = 20000;
 +static pcre2_match_context *match_ctxt = NULL;
 +
 +uint32 c_pcre_match_limit_recursion = 500;
 +uint32 c_pcre_match_limit = 100000;
- int32 pcre_max_cache_sz = 20000;
++uint32 pcre_max_cache_sz = 20000;
  int32 pcre_rnd_seed;
  
-@@ -97,6 +99,23 @@
+ id_hashed_key_t
+@@ -96,6 +98,23 @@
  }
  
  void
@@ -73,7 +75,7 @@ Last-Update: 2023-12-20
  release_compiled_regexp (id_hash_t *c_r, compiled_regexp_t *data)
  {
    int delete_data;
-@@ -112,9 +131,7 @@
+@@ -111,9 +130,7 @@
    if (!delete_data)
      return;
    if (NULL != data->code)
@@ -84,7 +86,7 @@ Last-Update: 2023-12-20
    dk_free (data, sizeof (compiled_regexp_t));
  }
  
-@@ -137,10 +154,11 @@
+@@ -136,10 +153,11 @@
  }
  
  static compiled_regexp_t *
@@ -99,9 +101,9 @@ Last-Update: 2023-12-20
    regexp_key_t key;
    compiled_regexp_t **val = NULL;
    compiled_regexp_t tmp, *new_val;
-@@ -156,46 +174,18 @@
+@@ -155,46 +173,18 @@
      }
-   HT_LEAVE (c_r);
+   HT_UNLOCK (c_r);
    dbg_printf (("regex compiling (%s) with options %x ...\n", pattern, options));
 -  tmp.code = pcre_compile (pattern, options, &error, &erroff, 0);
 +  tmp.code = pcre2_compile ((PCRE2_SPTR) pattern, strlen (pattern),
@@ -149,9 +151,9 @@ Last-Update: 2023-12-20
    new_val->code = tmp.code;
 -  new_val->code_x = tmp.code_x;
    new_val->refctr = 1;
-   HT_ENTER (c_r);
+   HT_WRLOCK (c_r);
    pcre_cache_check (c_r);
-@@ -302,18 +292,18 @@
+@@ -301,18 +291,18 @@
  }
  
  
@@ -175,7 +177,7 @@ Last-Update: 2023-12-20
  /*
  #define PCRE_EXTENDED           0x0008
  #define PCRE_ANCHORED           0x0010
-@@ -324,7 +314,7 @@
+@@ -323,7 +313,7 @@
  #define PCRE_UNGREEDY           0x0200
  #define PCRE_NOTEMPTY           0x0400
          */
@@ -184,7 +186,7 @@ Last-Update: 2023-12-20
          /*
  #define PCRE_NO_AUTO_CAPTURE    0x1000
          */
-@@ -338,15 +328,16 @@
+@@ -337,15 +327,16 @@
  bif_regexp_match (caddr_t * qst, caddr_t * err_ret, state_slot_t ** args)
  {
    int utf8_mode;
@@ -204,7 +206,7 @@ Last-Update: 2023-12-20
    int result;
  
    utf8_mode = 0;
-@@ -364,7 +355,7 @@
+@@ -363,7 +354,7 @@
    if (*err_ret) goto done;
  
    if (utf8_mode)
@@ -213,7 +215,7 @@ Last-Update: 2023-12-20
  
    if (!pattern || !str)
      goto done;
-@@ -373,10 +364,14 @@
+@@ -372,10 +363,14 @@
    if (err_ret[0])
      goto done;
  
@@ -230,7 +232,7 @@ Last-Update: 2023-12-20
        ret_str = dk_alloc_box (offvect[1] - offvect[0] + 1, DV_SHORT_STRING);
        strncpy (ret_str, str + offvect[0], offvect[1] - offvect[0]);
        ret_str[offvect[1] - offvect[0]] = 0;
-@@ -417,6 +412,7 @@
+@@ -416,6 +411,7 @@
  
  done:
    release_compiled_regexp (compiled_regexps, cd_info);
@@ -238,7 +240,7 @@ Last-Update: 2023-12-20
    if (*err_ret)
      dk_free_box (ret_str);
    dk_free_tree (p_to_free);
-@@ -428,15 +424,15 @@
+@@ -427,15 +423,15 @@
  bif_rdf_regex_impl (caddr_t * qst, caddr_t * err_ret, state_slot_t ** args)
  {
    int utf8_mode = 1;
@@ -257,7 +259,7 @@ Last-Update: 2023-12-20
    switch ((BOX_ELEMENTS (args)))
      {
      default:
-@@ -449,7 +445,7 @@
+@@ -448,7 +444,7 @@
    if (err) goto done;
  
    if (utf8_mode)
@@ -266,7 +268,7 @@ Last-Update: 2023-12-20
  
    if (!pattern || !str)
      goto done;
-@@ -457,11 +453,15 @@
+@@ -456,11 +452,15 @@
    cd_info = get_compiled_regexp (compiled_regexps, pattern, c_opts, err_ret);
    if (err_ret[0])
      goto done;
@@ -284,7 +286,7 @@ Last-Update: 2023-12-20
    if (err)
      dk_free_tree (err);
    dk_free_tree (p_to_free);
-@@ -477,13 +477,14 @@
+@@ -476,13 +476,14 @@
    char *pattern;
    char *str;
    int offset;
@@ -302,7 +304,7 @@ Last-Update: 2023-12-20
  
    utf8_mode = 0;
    pattern = bif_regexp_str_arg (qst, args, 0, "regexp_substr", REGEXP_BF, &utf8_mode, &p_to_free, err_ret);
-@@ -504,14 +505,20 @@
+@@ -503,14 +504,20 @@
    if (NULL != err_ret[0])
      goto done;
  
@@ -326,7 +328,7 @@ Last-Update: 2023-12-20
        if (rc < 0)
          {
            *err_ret = srv_make_new_error ("2201B", "SR097",
-@@ -522,15 +529,21 @@
+@@ -521,15 +528,21 @@
          {
            if (utf8_mode)
              {
@@ -349,7 +351,7 @@ Last-Update: 2023-12-20
    if (*err_ret)
      dk_free_box (ret_str);
    dk_free_tree (p_to_free);
-@@ -539,7 +552,7 @@
+@@ -538,10 +551,10 @@
  }
  
  ptrlong *
@@ -357,8 +359,21 @@ Last-Update: 2023-12-20
 +regexp_offvect_to_array_of_long (utf8char *str, PCRE2_SIZE *offvect, int result, int utf8_mode)
  {
    int i, idx_to_fill;
-   int prev_ofs, ofs, prev_wide_len;
-@@ -648,11 +661,13 @@
+-  int prev_ofs, ofs, prev_wide_len;
++  PCRE2_SIZE prev_ofs, ofs, prev_wide_len;
+   dk_set_t skipped_i = NULL;
+   ptrlong *ret_vec;
+   virt_mbstate_t mb;
+@@ -576,7 +589,7 @@
+       int next_nonnegative_ofs_i = i + 1;
+       while (next_nonnegative_ofs_i < result)
+         {
+-          if (0 > offvect[next_nonnegative_ofs_i])
++          if (PCRE2_UNSET == offvect[next_nonnegative_ofs_i])
+             {
+               next_nonnegative_ofs_i++;
+               continue;
+@@ -647,11 +660,13 @@
    int utf8_mode, utf8_mode2;
    char *pattern = NULL;
    char *str = NULL;
@@ -375,7 +390,7 @@ Last-Update: 2023-12-20
    ptrlong *ret_vec = NULL;
    dk_set_t ret_revlist = NULL;
  
-@@ -680,13 +695,16 @@
+@@ -679,13 +694,16 @@
    if (NULL != err_ret[0])
      goto done;
  
@@ -395,7 +410,7 @@ Last-Update: 2023-12-20
            if (0 >= result)
              break;
            ret_vec = regexp_offvect_to_array_of_long ((utf8char *)str, offvect, result, utf8_mode);
-@@ -699,13 +717,15 @@
+@@ -698,13 +716,15 @@
      }
    else
      {
@@ -413,7 +428,7 @@ Last-Update: 2023-12-20
    dk_free_tree (p_to_free);
    dk_free_tree (str_to_free);
    if (*err_ret)
-@@ -990,7 +1010,10 @@
+@@ -989,7 +1009,10 @@
  static caddr_t
  bif_regexp_version (caddr_t * qst, caddr_t * err_ret, state_slot_t ** args)
  {
@@ -424,8 +439,36 @@ Last-Update: 2023-12-20
 +  return box_dv_short_string(ver);
  }
  
- void
-@@ -1013,21 +1036,26 @@
+ /*
+@@ -1199,10 +1222,10 @@
+ {
+   static compiled_regexp_t *cd_iri, *cd_abs, *cd_ref;
+   compiled_regexp_t *cd_info = NULL;
+-  int r_opts = 0, c_opts = PCRE_UTF8 | PCRE_CASELESS;
+-  int offvect[NOFFSETS];
++  uint32_t r_opts = 0, c_opts = PCRE2_UTF | PCRE2_CASELESS;
++  pcre2_match_data *md;
+   int result;
+-  int str_len = (int) strlen (str);
++  PCRE2_SIZE str_len = strlen (str);
+   caddr_t pattern;
+ 
+   cd_info = (RX_IRI_REF == what ? cd_ref : (RX_IRI == what ? cd_iri : cd_abs));
+@@ -1226,8 +1249,11 @@
+               break;
+         }
+     }
+-  memset (offvect, -1, NOFFSETS * sizeof (int));
+-  result = pcre_exec (cd_info->code, cd_info->code_x, str, str_len, 0, r_opts, offvect, NOFFSETS);
++  create_match_context ();
++  md = pcre2_match_data_create (NOFFSETS, NULL);
++  result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, str_len, 0, r_opts,
++                        md, match_ctxt);
++  pcre2_match_data_free (md);
+   return (result != -1 ? 1 : 0);
+ }
+ 
+@@ -1268,21 +1294,26 @@
  
  /* internal functions for internal usage in Virtuoso */
  caddr_t
@@ -457,7 +500,7 @@ Last-Update: 2023-12-20
    release_compiled_regexp (compiled_regexps, cd_info);
    if (result != -1)
      {
-@@ -1041,14 +1069,15 @@
+@@ -1296,14 +1327,15 @@
  
  
  caddr_t
@@ -477,7 +520,7 @@ Last-Update: 2023-12-20
    if (NULL == cd_info)
      {
        cd_info = get_compiled_regexp (compiled_regexps, pattern, c_opts, &err);
-@@ -1056,8 +1085,12 @@
+@@ -1311,8 +1343,12 @@
          sqlr_resignal (err);
        ret[0] = cd_info;
      }
@@ -492,7 +535,7 @@ Last-Update: 2023-12-20
    if (result != -1)
      return box_dv_short_nchars (str + offvect[0], offvect[1] - offvect[0]);
    return NULL;
-@@ -1068,21 +1101,21 @@
+@@ -1323,26 +1359,26 @@
    char	mc;
    int	opt;
  } regexp_mode_table[] = {
@@ -520,7 +563,13 @@ Last-Update: 2023-12-20
    if (!mode)
      return 0;
    while (mode_char[0])
-@@ -1117,18 +1150,24 @@
+     {
+-      int i;
++      size_t i;
+       for (i=0;i<regexp_mode_table_l;i++)
+ 	{
+ 	  if (regexp_mode_table[i].mc == mode_char[0])
+@@ -1372,18 +1408,24 @@
  */
  
  int
@@ -550,7 +599,7 @@ Last-Update: 2023-12-20
    release_compiled_regexp (compiled_regexps, cd_info);
    return result;
  }
-@@ -1142,11 +1181,12 @@
+@@ -1397,11 +1439,12 @@
  */
  
  caddr_t
@@ -567,7 +616,7 @@ Last-Update: 2023-12-20
    int result;
    caddr_t ret_str;
    caddr_t err = NULL;
-@@ -1154,15 +1194,19 @@
+@@ -1409,15 +1452,19 @@
    if (err)
      sqlr_resignal (err);
  
@@ -590,7 +639,7 @@ Last-Update: 2023-12-20
      }
    else
      {
-@@ -1171,5 +1215,6 @@
+@@ -1426,5 +1473,6 @@
          next[0] = -1;
      }
    release_compiled_regexp (compiled_regexps, cd_info);
@@ -599,7 +648,7 @@ Last-Update: 2023-12-20
  }
 --- virtuoso-opensource.orig/libsrc/Wi/srvstat.c
 +++ virtuoso-opensource/libsrc/Wi/srvstat.c
-@@ -414,8 +414,8 @@
+@@ -419,8 +419,8 @@
  extern int dbf_assert_on_malformed_data;
  extern int dbf_max_itc_samples;
  
@@ -608,21 +657,22 @@ Last-Update: 2023-12-20
 +extern uint32 c_pcre_match_limit;
 +extern uint32 c_pcre_match_limit_recursion;
  extern int32 pcre_max_cache_sz;
- 
- extern int32 shcompo_max_cache_sz;
+ extern int64 users_cache_sz;
+ extern int32 enable_cpt_rb_ck;
 --- virtuoso-opensource.orig/libsrc/Wi/sqlbif.h
 +++ virtuoso-opensource/libsrc/Wi/sqlbif.h
-@@ -314,11 +314,11 @@
+@@ -324,12 +324,12 @@
  extern caddr_t file_native_name_from_iri_path_nchars (const char *iri_path, size_t iri_path_len);
  caddr_t get_ssl_error_text (char *buf, int len);
  
 -caddr_t regexp_match_01 (const char *pattern, const char *str, int c_opts);
 -caddr_t regexp_match_01_const (const char* pattern, const char* str, int c_opts, void ** compiled_ret);
++caddr_t regexp_match_01 (const char *pattern, const char *str, uint32 c_opts);
++caddr_t regexp_match_01_const (const char* pattern, const char* str, uint32_t c_opts, void ** compiled_ret);
+ int regexp_match_iri_const (int what, const char* str, caddr_t *err_ret);
 -caddr_t regexp_split_match (const char* pattern, const char* str, int* next, int c_opts);
 -int regexp_make_opts (const char* mode);
 -int regexp_split_parse (const char* pattern, const char* str, int* offvect, int offvect_sz, int c_opts);
-+caddr_t regexp_match_01 (const char *pattern, const char *str, uint32 c_opts);
-+caddr_t regexp_match_01_const (const char* pattern, const char* str, uint32_t c_opts, void ** compiled_ret);
 +caddr_t regexp_split_match (const char* pattern, const char* str, int* next, uint32_t c_opts);
 +uint32_t regexp_make_opts (const char* mode);
 +int regexp_split_parse (const char* pattern, const char* str, size_t* offvect, uint32_t offvect_sz, uint32_t c_opts);
@@ -641,7 +691,7 @@ Last-Update: 2023-12-20
  
  #define ecm_isname(c) \
    ( ((c) & ~0xFF) ? (ecm_utf8props[(c)] & ECM_ISNAME) : \
-@@ -1540,7 +1541,7 @@
+@@ -1558,7 +1559,7 @@
  
  
  static void
@@ -650,7 +700,7 @@ Last-Update: 2023-12-20
  {
    caddr_t pre_res;
    int next;
-@@ -1552,13 +1553,13 @@
+@@ -1570,13 +1571,13 @@
      sqlr_new_error ("42001", "XRQ??", "invalid regular expression");
  }
  
@@ -667,7 +717,7 @@ Last-Update: 2023-12-20
    return c_opts;
  }
  
-@@ -1570,7 +1571,7 @@
+@@ -1588,7 +1589,7 @@
    int next = 1;
    caddr_t str_inx = str;
    dk_set_t res_set = 0;
@@ -676,7 +726,7 @@ Last-Update: 2023-12-20
  
    c_opts=xqf_make_regexp_modes (flag);
    xqf_check_regexp (pattern, c_opts);
-@@ -1628,7 +1629,7 @@
+@@ -1646,7 +1647,7 @@
  xqf_matches (xp_instance_t * xqi, XT * tree, xml_entity_t * ctx_xe)
  {
    caddr_t val1, val2, val3 = NULL;
@@ -685,7 +735,7 @@ Last-Update: 2023-12-20
  
    if (tree->_.xp_func.argcount)
      {
-@@ -2003,7 +2004,7 @@
+@@ -2021,7 +2022,7 @@
  #define XQF_REPL_OK	0
  
  static int
@@ -694,7 +744,7 @@ Last-Update: 2023-12-20
  {
    int repl_sz = box_length (replacement) - 1;
    int idx = 0;
-@@ -2042,7 +2043,7 @@
+@@ -2060,7 +2061,7 @@
    caddr_t pattern = xpf_arg (xqi, tree, ctx_xe, DV_STRING, 1);
    caddr_t replacement = xpf_arg (xqi, tree, ctx_xe, DV_STRING, 2);
    caddr_t flag = 0;
@@ -703,7 +753,7 @@ Last-Update: 2023-12-20
    if (tree->_.xp_func.argcount > 3)
      flag = xpf_arg (xqi, tree, ctx_xe, DV_STRING, 3);
  
-@@ -2050,7 +2051,7 @@
+@@ -2068,7 +2069,7 @@
    xqf_check_regexp (pattern, c_opts);
  
    {
diff --git a/debian/patches/series b/debian/patches/series
index 395a224ed..0c9e456f6 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -21,4 +21,4 @@ cil.patch
 wbxml2.patch
 tirpc.patch
 reproducible.patch
-#pcre2.patch
+pcre2.patch
-- 
2.43.0



More information about the debian-science-maintainers mailing list