Bug#999931: virtuoso-opensource: depends on obsolete pcre3 library

Yavor Doganov yavor at gnu.org
Wed Dec 20 19:18:20 GMT 2023


Control: tags -1 + patch

Please find attached a patch; build-tested only (although the
package's own testsuite has some checks which test this
functionality).
-------------- next part --------------
Description: Port to PCRE2.
Bug-Debian: https://bugs.debian.org/999931
Author: Yavor Doganov <yavor at gnu.org>
Forwarded: no
Last-Update: 2023-12-20
---

--- virtuoso-opensource.orig/libsrc/Wi/Makefile.am
+++ virtuoso-opensource/libsrc/Wi/Makefile.am
@@ -559,7 +559,7 @@
 	$(libwi_base_la_sources)
 
 libwi_la_CFLAGS  = $(libwi_base_la_cflags)
-libwi_la_LDLAGS  = -static -lminizip -lpcre
+libwi_la_LDLAGS  = -static -lminizip -lpcre2-8
 
 #KUBL_UNIV_FILES_ODBC
 libwi_odbc_la_SOURCES += \
--- virtuoso-opensource.orig/libsrc/Wi/bif_regexp.c
+++ virtuoso-opensource/libsrc/Wi/bif_regexp.c
@@ -30,7 +30,8 @@
 
 // Debian maintainer: replaced by external PCRE
 // #include "util/pcrelib/pcre.h"
-#include "pcre.h"
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
 
 /*
    typedef struct rx_query_s {
@@ -66,15 +67,16 @@
 typedef struct compiled_regexp_s
 {
   int refctr;
-  pcre *code;
-  pcre_extra *code_x;
+  pcre2_code *code;
 }
 compiled_regexp_t;
 
 id_hash_t *compiled_regexps;
 
-int32 c_pcre_match_limit_recursion = 500;
-int32 c_pcre_match_limit = 100000;
+static pcre2_match_context *match_ctxt = NULL;
+
+uint32 c_pcre_match_limit_recursion = 500;
+uint32 c_pcre_match_limit = 100000;
 int32 pcre_max_cache_sz = 20000;
 int32 pcre_rnd_seed;
 
@@ -97,6 +99,23 @@
 }
 
 void
+create_match_context (void)
+{
+  if (NULL != match_ctxt)
+    return;
+
+  match_ctxt = pcre2_match_context_create (NULL);
+  if (c_pcre_match_limit > 0)
+    {
+      pcre2_set_match_limit (match_ctxt, c_pcre_match_limit);
+    }
+  if (c_pcre_match_limit_recursion > 0)
+    {
+      pcre2_set_depth_limit (match_ctxt, c_pcre_match_limit_recursion);
+    }
+}
+
+void
 release_compiled_regexp (id_hash_t *c_r, compiled_regexp_t *data)
 {
   int delete_data;
@@ -112,9 +131,7 @@
   if (!delete_data)
     return;
   if (NULL != data->code)
-    pcre_free (data->code);
-  if (NULL != data->code_x)
-    pcre_free (data->code_x);
+    pcre2_code_free (data->code);
   dk_free (data, sizeof (compiled_regexp_t));
 }
 
@@ -137,10 +154,11 @@
 }
 
 static compiled_regexp_t *
-get_compiled_regexp (id_hash_t *c_r, const char *pattern, int options, caddr_t *err_ret)
+get_compiled_regexp (id_hash_t *c_r, const char *pattern, uint32_t options, caddr_t *err_ret)
 {
-  const char *error = NULL;
-  int erroff;
+  int err;
+  PCRE2_UCHAR error[120];
+  PCRE2_SIZE erroff;
   regexp_key_t key;
   compiled_regexp_t **val = NULL;
   compiled_regexp_t tmp, *new_val;
@@ -156,46 +174,18 @@
     }
   HT_LEAVE (c_r);
   dbg_printf (("regex compiling (%s) with options %x ...\n", pattern, options));
-  tmp.code = pcre_compile (pattern, options, &error, &erroff, 0);
+  tmp.code = pcre2_compile ((PCRE2_SPTR) pattern, strlen (pattern),
+                            options, &err, &erroff, NULL);
   if (NULL == tmp.code)
     {
-      if (error)
-        err_ret[0] = srv_make_new_error ("2201B",
-            "SR098", "regexp error at \'%s\' column %d (%s)", pattern, erroff, error);
-      else
-        err_ret[0] = srv_make_new_error ("2201B",
-            "SR098", "regexp error at \'%s\' column %d", pattern, erroff);
+      pcre2_get_error_message(err, error, sizeof (error));
+      err_ret[0] = srv_make_new_error ("2201B",
+          "SR098", "regexp error at \'%s\' column %zu (%s)", pattern, erroff, error);
       return NULL;
     }
-  tmp.code_x = pcre_study (tmp.code, options, &error);
-#ifdef DEBUG
-  if (!tmp.code_x)
-    dbg_printf (("***warning RX100: regexp warning: extra regular expression compiling failed\n"));
-#endif
-  if (!tmp.code_x)
-     {
-       tmp.code_x = pcre_malloc (sizeof (pcre_extra));
-       if (tmp.code_x)
-         memset (tmp.code_x, 0, sizeof (pcre_extra));
-     }
-#ifdef PCRE_EXTRA_MATCH_LIMIT
-  if (c_pcre_match_limit > 0)
-    {
-      tmp.code_x->flags |= PCRE_EXTRA_MATCH_LIMIT;
-      tmp.code_x->match_limit = c_pcre_match_limit;
-    }
-#endif
-#ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
-  if (c_pcre_match_limit_recursion > 0)
-    {
-      tmp.code_x->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
-      tmp.code_x->match_limit_recursion = c_pcre_match_limit_recursion;
-    }
-#endif
   key.orig_strg = box_dv_short_string (pattern);
   new_val = (compiled_regexp_t *)dk_alloc (sizeof (compiled_regexp_t));
   new_val->code = tmp.code;
-  new_val->code_x = tmp.code_x;
   new_val->refctr = 1;
   HT_ENTER (c_r);
   pcre_cache_check (c_r);
@@ -302,18 +292,18 @@
 }
 
 
-static int
+static uint32_t
 regexp_optchars_to_bits (const char *strg)
 {
-  int res = 0;
+  uint32_t res = 0;
   const char *tail;
   for (tail = strg; '\0' != tail[0]; tail++)
     {
       switch (tail[0])
         {
-        case 'i': case 'I': res |= PCRE_CASELESS; break;
-        case 'm': case 'M': res |= PCRE_MULTILINE; break;
-        case 's': case 'S': res |= PCRE_DOTALL; break;
+        case 'i': case 'I': res |= PCRE2_CASELESS; break;
+        case 'm': case 'M': res |= PCRE2_MULTILINE; break;
+        case 's': case 'S': res |= PCRE2_DOTALL; break;
 /*
 #define PCRE_EXTENDED           0x0008
 #define PCRE_ANCHORED           0x0010
@@ -324,7 +314,7 @@
 #define PCRE_UNGREEDY           0x0200
 #define PCRE_NOTEMPTY           0x0400
         */
-        case 'u': case 'U': res |= PCRE_UTF8; break;
+        case 'u': case 'U': res |= PCRE2_UTF; break;
         /*
 #define PCRE_NO_AUTO_CAPTURE    0x1000
         */
@@ -338,15 +328,16 @@
 bif_regexp_match (caddr_t * qst, caddr_t * err_ret, state_slot_t ** args)
 {
   int utf8_mode;
-  int str_len;
+  PCRE2_SIZE str_len;
   compiled_regexp_t *cd_info = NULL;
   caddr_t p_to_free = NULL, str_to_free = NULL;
   char *pattern;
   char *str;
-  int c_opts = 0, r_opts = 0;
+  uint32_t c_opts = 0, r_opts = 0;
   caddr_t ret_str = NULL;
   long replace_the_instr = 0;
-  int offvect[NOFFSETS];
+  PCRE2_SIZE *offvect;
+  pcre2_match_data *md = NULL;
   int result;
 
   utf8_mode = 0;
@@ -364,7 +355,7 @@
   if (*err_ret) goto done;
 
   if (utf8_mode)
-    c_opts |= PCRE_UTF8;
+    c_opts |= PCRE2_UTF;
 
   if (!pattern || !str)
     goto done;
@@ -373,10 +364,14 @@
   if (err_ret[0])
     goto done;
 
-  str_len = (int) strlen (str);
-  result = pcre_exec (cd_info->code, cd_info->code_x, str, str_len, 0, r_opts, offvect, NOFFSETS);
+  str_len = strlen (str);
+  create_match_context ();
+  md = pcre2_match_data_create (NOFFSETS, NULL);
+  result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, str_len, 0, r_opts,
+                        md, match_ctxt);
   if (result >= 0)
     {
+      offvect = pcre2_get_ovector_pointer (md);
       ret_str = dk_alloc_box (offvect[1] - offvect[0] + 1, DV_SHORT_STRING);
       strncpy (ret_str, str + offvect[0], offvect[1] - offvect[0]);
       ret_str[offvect[1] - offvect[0]] = 0;
@@ -417,6 +412,7 @@
 
 done:
   release_compiled_regexp (compiled_regexps, cd_info);
+  pcre2_match_data_free (md);
   if (*err_ret)
     dk_free_box (ret_str);
   dk_free_tree (p_to_free);
@@ -428,15 +424,15 @@
 bif_rdf_regex_impl (caddr_t * qst, caddr_t * err_ret, state_slot_t ** args)
 {
   int utf8_mode = 1;
-  int str_len;
+  PCRE2_SIZE str_len;
   compiled_regexp_t *cd_info = NULL;
   caddr_t p_to_free = NULL, str_to_free = NULL;
   char *pattern;
   char *str;
-  int c_opts = 0, r_opts = 0;
+  uint32_t c_opts = 0, r_opts = 0;
   int result = -1;
   caddr_t err = NULL;
-  int offvect[NOFFSETS];
+  pcre2_match_data *md = NULL;
   switch ((BOX_ELEMENTS (args)))
     {
     default:
@@ -449,7 +445,7 @@
   if (err) goto done;
 
   if (utf8_mode)
-    c_opts |= PCRE_UTF8;
+    c_opts |= PCRE2_UTF;
 
   if (!pattern || !str)
     goto done;
@@ -457,11 +453,15 @@
   cd_info = get_compiled_regexp (compiled_regexps, pattern, c_opts, err_ret);
   if (err_ret[0])
     goto done;
-  str_len = (int) strlen (str);
-  result = pcre_exec (cd_info->code, cd_info->code_x, str, str_len, 0, r_opts, offvect, NOFFSETS);
+  str_len = strlen (str);
+  create_match_context ();
+  md = pcre2_match_data_create (NOFFSETS, NULL);
+  result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, str_len, 0, r_opts,
+                        md, match_ctxt);
 
 done:
   release_compiled_regexp (compiled_regexps, cd_info);
+  pcre2_match_data_free (md);
   if (err)
     dk_free_tree (err);
   dk_free_tree (p_to_free);
@@ -477,13 +477,14 @@
   char *pattern;
   char *str;
   int offset;
-  int res_len;
+  PCRE2_SIZE res_len;
   compiled_regexp_t *cd_info = NULL;
-  int c_opts = 0, r_opts = 0;
+  uint32_t c_opts = 0, r_opts = 0;
   caddr_t p_to_free = NULL, str_to_free = NULL;
   caddr_t ret_str = NULL;
   int result;
-  int offvect[NOFFSETS];
+  PCRE2_SIZE *offvect;
+  pcre2_match_data *md = NULL;
 
   utf8_mode = 0;
   pattern = bif_regexp_str_arg (qst, args, 0, "regexp_substr", REGEXP_BF, &utf8_mode, &p_to_free, err_ret);
@@ -504,14 +505,20 @@
   if (NULL != err_ret[0])
     goto done;
 
-  res_len = (int) strlen (str);
-  result = pcre_exec (cd_info->code, cd_info->code_x, str, res_len, 0, r_opts, offvect, NOFFSETS);
+  res_len = strlen (str);
+  create_match_context ();
+  md = pcre2_match_data_create (NOFFSETS, NULL);
+  result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, res_len, 0, r_opts,
+                        md, match_ctxt);
   if (result > 0)
     {
+      PCRE2_UCHAR *substr;
+
+      offvect = pcre2_get_ovector_pointer (md);
       int offs = offset*2, rc;
       int ret_strlen = (offset < result && offset >= 0 ? (offvect[offs+1] - offvect[offs]) : res_len);
       ret_str = dk_alloc_box (ret_strlen + 1, DV_SHORT_STRING);
-      rc = pcre_copy_substring (str, offvect, result, offset, ret_str, res_len + 1);
+      rc = pcre2_substring_get_bynumber (md, offset, &substr, &res_len);
       if (rc < 0)
         {
           *err_ret = srv_make_new_error ("2201B", "SR097",
@@ -522,15 +529,21 @@
         {
           if (utf8_mode)
             {
-              caddr_t wide_ret = box_utf8_as_wide_char (ret_str, NULL, ret_strlen, 0);
+              caddr_t wide_ret = box_utf8_as_wide_char ((ccaddr_t) substr, NULL, ret_strlen, 0);
               dk_free_box (ret_str);
               ret_str = wide_ret;
+              pcre2_substring_free (substr);
+            }
+          else
+            {
+              ret_str = (caddr_t) substr;
             }
         }
     }
 
 done:
   release_compiled_regexp (compiled_regexps, cd_info);
+  pcre2_match_data_free (md);
   if (*err_ret)
     dk_free_box (ret_str);
   dk_free_tree (p_to_free);
@@ -539,7 +552,7 @@
 }
 
 ptrlong *
-regexp_offvect_to_array_of_long (utf8char *str, int *offvect, int result, int utf8_mode)
+regexp_offvect_to_array_of_long (utf8char *str, PCRE2_SIZE *offvect, int result, int utf8_mode)
 {
   int i, idx_to_fill;
   int prev_ofs, ofs, prev_wide_len;
@@ -648,11 +661,13 @@
   int utf8_mode, utf8_mode2;
   char *pattern = NULL;
   char *str = NULL;
-  int offset, str_len;
+  int offset;
+  PCRE2_SIZE str_len;
   compiled_regexp_t *cd_info = NULL;
-  int c_opts = 0, r_opts = 0, max_n_hits = 0x1000000 / sizeof (ptrlong);
+  uint32_t c_opts = 0, r_opts = 0, max_n_hits = 0x1000000 / sizeof (ptrlong);
   caddr_t p_to_free = NULL, str_to_free = NULL;
-  int offvect[NOFFSETS];
+  PCRE2_SIZE *offvect;
+  pcre2_match_data *md = NULL;
   ptrlong *ret_vec = NULL;
   dk_set_t ret_revlist = NULL;
 
@@ -680,13 +695,16 @@
   if (NULL != err_ret[0])
     goto done;
 
-  str_len = (int) strlen (str);
+  str_len = strlen (str);
+  create_match_context ();
+  md = pcre2_match_data_create (NOFFSETS, NULL);
   if (parse_list)
     {
       while (0 < max_n_hits--)
         {
-          int result = pcre_exec (cd_info->code, cd_info->code_x, str, str_len, offset, r_opts,
-            offvect, NOFFSETS);
+          int result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, str_len,
+                                    offset, r_opts, md, match_ctxt);
+          offvect = pcre2_get_ovector_pointer (md);
           if (0 >= result)
             break;
           ret_vec = regexp_offvect_to_array_of_long ((utf8char *)str, offvect, result, utf8_mode);
@@ -699,13 +717,15 @@
     }
   else
     {
-      int result = pcre_exec (cd_info->code, cd_info->code_x, str, str_len, offset, r_opts,
-        offvect, NOFFSETS);
+      int result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, str_len,
+                                offset, r_opts, md, match_ctxt);
+      offvect = pcre2_get_ovector_pointer (md);
       ret_vec = regexp_offvect_to_array_of_long ((utf8char *)str, offvect, result, utf8_mode);
     }
 
 done:
   release_compiled_regexp (compiled_regexps, cd_info);
+  pcre2_match_data_free (md);
   dk_free_tree (p_to_free);
   dk_free_tree (str_to_free);
   if (*err_ret)
@@ -990,7 +1010,10 @@
 static caddr_t
 bif_regexp_version (caddr_t * qst, caddr_t * err_ret, state_slot_t ** args)
 {
-  return box_dv_short_string(pcre_version());
+  char ver[24];
+
+  pcre2_config (PCRE2_CONFIG_VERSION, ver);
+  return box_dv_short_string(ver);
 }
 
 void
@@ -1013,21 +1036,26 @@
 
 /* internal functions for internal usage in Virtuoso */
 caddr_t
-regexp_match_01 (const char* pattern, const char* str, int c_opts)
+regexp_match_01 (const char* pattern, const char* str, uint32_t c_opts)
 {
   compiled_regexp_t *cd_info = NULL;
   int r_opts = 0;
   caddr_t err = NULL;
-  int offvect[NOFFSETS];
+  PCRE2_SIZE *offvect;
+  pcre2_match_data *md;
   int result;
-  int str_len = (int) strlen (str);
+  size_t str_len = strlen (str);
 
   cd_info = get_compiled_regexp (compiled_regexps, pattern, c_opts, &err);
   if (err)
     sqlr_resignal (err);
 
-  memset (offvect, -1, NOFFSETS * sizeof (int));
-  result = pcre_exec (cd_info->code, cd_info->code_x, str, str_len, 0, r_opts, offvect, NOFFSETS);
+  create_match_context ();
+  md = pcre2_match_data_create (NOFFSETS, NULL);
+  result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, str_len, 0, r_opts,
+                        md, match_ctxt);
+  offvect = pcre2_get_ovector_pointer (md);
+  pcre2_match_data_free (md);
   release_compiled_regexp (compiled_regexps, cd_info);
   if (result != -1)
     {
@@ -1041,14 +1069,15 @@
 
 
 caddr_t
-regexp_match_01_const (const char* pattern, const char* str, int c_opts, void** ret)
+regexp_match_01_const (const char* pattern, const char* str, uint32_t c_opts, void** ret)
 {
   compiled_regexp_t *cd_info = ((compiled_regexp_t **)ret)[0];
-  int r_opts = 0;
+  uint32_t r_opts = 0;
   caddr_t err = NULL;
-  int offvect[NOFFSETS];
+  PCRE2_SIZE *offvect;
+  pcre2_match_data *md;
   int result;
-  int str_len = (int) strlen (str);
+  PCRE2_SIZE str_len = strlen (str);
   if (NULL == cd_info)
     {
       cd_info = get_compiled_regexp (compiled_regexps, pattern, c_opts, &err);
@@ -1056,8 +1085,12 @@
         sqlr_resignal (err);
       ret[0] = cd_info;
     }
-  memset (offvect, -1, NOFFSETS * sizeof (int));
-  result = pcre_exec (cd_info->code, cd_info->code_x, str, str_len, 0, r_opts, offvect, NOFFSETS);
+  create_match_context ();
+  md = pcre2_match_data_create (NOFFSETS, NULL);
+  result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, str_len, 0, r_opts,
+                        md, match_ctxt);
+  offvect = pcre2_get_ovector_pointer (md);
+  pcre2_match_data_free (md);
   if (result != -1)
     return box_dv_short_nchars (str + offvect[0], offvect[1] - offvect[0]);
   return NULL;
@@ -1068,21 +1101,21 @@
   char	mc;
   int	opt;
 } regexp_mode_table[] = {
-  { 'i',	PCRE_CASELESS },
-  { 'm',	PCRE_MULTILINE },
-  { 's',	PCRE_DOTALL },
-  { 'x',	PCRE_EXTENDED }
+  { 'i',	PCRE2_CASELESS },
+  { 'm',	PCRE2_MULTILINE },
+  { 's',	PCRE2_DOTALL },
+  { 'x',	PCRE2_EXTENDED }
 };
 
 #define regexp_mode_table_l (sizeof(regexp_mode_table)/sizeof(struct regexp_opts_s))
 
 
 
-int
+uint32_t
 regexp_make_opts (const char* mode)
 {
   const char* mode_char = mode;
-  int c_opts = 0;
+  uint32_t c_opts = 0;
   if (!mode)
     return 0;
   while (mode_char[0])
@@ -1117,18 +1150,24 @@
 */
 
 int
-regexp_split_parse (const char* pattern, const char* str, int* offvect, int offvect_sz, int c_opts)
+regexp_split_parse (const char* pattern, const char* str, PCRE2_SIZE* offvect, uint32_t offvect_sz, uint32_t c_opts)
 {
-  int str_len;
-  int r_opts = 0;
+  PCRE2_SIZE str_len;
+  uint32_t r_opts = 0;
   int result;
   caddr_t err = NULL;
+  pcre2_match_data *md;
   compiled_regexp_t *cd_info = get_compiled_regexp (compiled_regexps, pattern, c_opts, &err);
   if (err)
     sqlr_resignal (err);
 
-  str_len = (int) strlen (str);
-  result = pcre_exec (cd_info->code, cd_info->code_x, str, str_len, 0, r_opts, offvect, offvect_sz);
+  str_len = strlen (str);
+  create_match_context ();
+  md = pcre2_match_data_create (offvect_sz, NULL);
+  result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, str_len, 0, r_opts,
+                        md, match_ctxt);
+  offvect = pcre2_get_ovector_pointer (md);
+  pcre2_match_data_free (md);
   release_compiled_regexp (compiled_regexps, cd_info);
   return result;
 }
@@ -1142,11 +1181,12 @@
 */
 
 caddr_t
-regexp_split_match (const char* pattern, const char* str, int* next, int c_opts)
+regexp_split_match (const char* pattern, const char* str, int* next, uint32_t c_opts)
 {
-  int str_len;
-  int r_opts = 0;
-  int offvect[NOFFSETS];
+  PCRE2_SIZE str_len;
+  uint32_t r_opts = 0;
+  PCRE2_SIZE *offvect;
+  pcre2_match_data *md;
   int result;
   caddr_t ret_str;
   caddr_t err = NULL;
@@ -1154,15 +1194,19 @@
   if (err)
     sqlr_resignal (err);
 
-  str_len = (int) strlen (str);
-  result = pcre_exec (cd_info->code, cd_info->code_x, str, str_len, 0, r_opts, offvect, NOFFSETS);
+  str_len = strlen (str);
+  create_match_context ();
+  md = pcre2_match_data_create (NOFFSETS, NULL);
+  result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, str_len, 0, r_opts,
+                        md, match_ctxt);
+  offvect = pcre2_get_ovector_pointer (md);
   if (result != -1)
     {
       ret_str = dk_alloc_box (offvect[0] + 1, DV_STRING);
       strncpy (ret_str, str, offvect[0]);
       ret_str[offvect[0]] = 0;
       if (next)
-        next[0] = offvect[1];
+        next[0] = (int) offvect[1];
     }
   else
     {
@@ -1171,5 +1215,6 @@
         next[0] = -1;
     }
   release_compiled_regexp (compiled_regexps, cd_info);
+  pcre2_match_data_free (md);
   return ret_str;
 }
--- virtuoso-opensource.orig/libsrc/Wi/srvstat.c
+++ virtuoso-opensource/libsrc/Wi/srvstat.c
@@ -414,8 +414,8 @@
 extern int dbf_assert_on_malformed_data;
 extern int dbf_max_itc_samples;
 
-extern int32 c_pcre_match_limit;
-extern int32 c_pcre_match_limit_recursion;
+extern uint32 c_pcre_match_limit;
+extern uint32 c_pcre_match_limit_recursion;
 extern int32 pcre_max_cache_sz;
 
 extern int32 shcompo_max_cache_sz;
--- virtuoso-opensource.orig/libsrc/Wi/sqlbif.h
+++ virtuoso-opensource/libsrc/Wi/sqlbif.h
@@ -314,11 +314,11 @@
 extern caddr_t file_native_name_from_iri_path_nchars (const char *iri_path, size_t iri_path_len);
 caddr_t get_ssl_error_text (char *buf, int len);
 
-caddr_t regexp_match_01 (const char *pattern, const char *str, int c_opts);
-caddr_t regexp_match_01_const (const char* pattern, const char* str, int c_opts, void ** compiled_ret);
-caddr_t regexp_split_match (const char* pattern, const char* str, int* next, int c_opts);
-int regexp_make_opts (const char* mode);
-int regexp_split_parse (const char* pattern, const char* str, int* offvect, int offvect_sz, int c_opts);
+caddr_t regexp_match_01 (const char *pattern, const char *str, uint32 c_opts);
+caddr_t regexp_match_01_const (const char* pattern, const char* str, uint32_t c_opts, void ** compiled_ret);
+caddr_t regexp_split_match (const char* pattern, const char* str, int* next, uint32_t c_opts);
+uint32_t regexp_make_opts (const char* mode);
+int regexp_split_parse (const char* pattern, const char* str, size_t* offvect, uint32_t offvect_sz, uint32_t c_opts);
 
 /*! Wrapper for uu_decode_part,
  modifies \c src input string! */
--- virtuoso-opensource.orig/libsrc/Wi/xqf.c
+++ virtuoso-opensource/libsrc/Wi/xqf.c
@@ -41,7 +41,8 @@
 
 // Debian maintainer: replaced by external PCRE
 // #include "util/pcrelib/pcre.h"
-#include "pcre.h"
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
 
 #define ecm_isname(c) \
   ( ((c) & ~0xFF) ? (ecm_utf8props[(c)] & ECM_ISNAME) : \
@@ -1540,7 +1541,7 @@
 
 
 static void
-xqf_check_regexp (caddr_t pattern, int c_opts)
+xqf_check_regexp (caddr_t pattern, uint32_t c_opts)
 {
   caddr_t pre_res;
   int next;
@@ -1552,13 +1553,13 @@
     sqlr_new_error ("42001", "XRQ??", "invalid regular expression");
 }
 
-static int
+static uint32_t
 xqf_make_regexp_modes(const char * flag)
 {
-  int c_opts;
+  uint32_t c_opts;
   if ((c_opts=regexp_make_opts (flag)) == -1)
     sqlr_new_error ("42001", "XRQ??", "invalid regular expression flag");
-  c_opts |= PCRE_UTF8;
+  c_opts |= PCRE2_UTF;
   return c_opts;
 }
 
@@ -1570,7 +1571,7 @@
   int next = 1;
   caddr_t str_inx = str;
   dk_set_t res_set = 0;
-  int c_opts;
+  uint32_t c_opts;
 
   c_opts=xqf_make_regexp_modes (flag);
   xqf_check_regexp (pattern, c_opts);
@@ -1628,7 +1629,7 @@
 xqf_matches (xp_instance_t * xqi, XT * tree, xml_entity_t * ctx_xe)
 {
   caddr_t val1, val2, val3 = NULL;
-  int c_opts;
+  uint32_t c_opts;
 
   if (tree->_.xp_func.argcount)
     {
@@ -2003,7 +2004,7 @@
 #define XQF_REPL_OK	0
 
 static int
-xqf_write_replacement (dk_session_t * ses, caddr_t input, int * offvect, int offvect_sz, caddr_t replacement)
+xqf_write_replacement (dk_session_t * ses, caddr_t input, size_t * offvect, int offvect_sz, caddr_t replacement)
 {
   int repl_sz = box_length (replacement) - 1;
   int idx = 0;
@@ -2042,7 +2043,7 @@
   caddr_t pattern = xpf_arg (xqi, tree, ctx_xe, DV_STRING, 1);
   caddr_t replacement = xpf_arg (xqi, tree, ctx_xe, DV_STRING, 2);
   caddr_t flag = 0;
-  int c_opts;
+  uint32_t c_opts;
   if (tree->_.xp_func.argcount > 3)
     flag = xpf_arg (xqi, tree, ctx_xe, DV_STRING, 3);
 
@@ -2050,7 +2051,7 @@
   xqf_check_regexp (pattern, c_opts);
 
   {
-    int offvect[128];
+    size_t offvect[128];
     int res = regexp_split_parse (pattern, input, offvect, 128, c_opts);
     int utf8_str_len = box_length (input) - 1;
     if (res != -1)
--- virtuoso-opensource.orig/libsrc/Xml.new/datatypes.c
+++ virtuoso-opensource/libsrc/Xml.new/datatypes.c
@@ -25,7 +25,8 @@
 
 #include "xmlparser_impl.h"
 #include "schema.h"
-#include "pcre.h"
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
 
 ptrlong
 xs_get_primitive_typeidx (vxml_parser_t * parser, xs_component_t *type)
@@ -48,7 +49,7 @@
   return (ptrlong)base_type;
 }
 
-extern caddr_t regexp_match_01 (const char* pattern, const char* str, int c_opts);
+extern caddr_t regexp_match_01 (const char* pattern, const char* str, uint32_t c_opts);
 
 int
 xs_check_type_compliance (vxml_parser_t * parser, xs_component_t *type,
@@ -69,7 +70,7 @@
   if (NULL != basetype_regexp)
     {
       int match_len = 1;
-      caddr_t match = regexp_match_01 (basetype_regexp, (const char *) value, PCRE_UTF8);
+      caddr_t match = regexp_match_01 (basetype_regexp, (const char *) value, PCRE2_UTF);
       if (match)
 	{
 	  match_len = box_length (match);
--- virtuoso-opensource.orig/binsrc/virtuoso/Makefile.am
+++ virtuoso-opensource/binsrc/virtuoso/Makefile.am
@@ -39,7 +39,7 @@
     VIRTUOSO_BIN=virtuoso-t
 endif
 
-LIBS		+= @ZLIB_LIB@ -lminizip -lpcre
+LIBS		+= @ZLIB_LIB@ -lminizip -lpcre2-8
 
 lib_LTLIBRARIES = libvirtuoso-t.la $(IODBC_LIBS) $(MONO_LIBS) $(IODBC_MONO_LIBS)
 bin_PROGRAMS = virtuoso-t $(IODBC_PROGS)
--- virtuoso-opensource.orig/binsrc/tests/Makefile.am
+++ virtuoso-opensource/binsrc/tests/Makefile.am
@@ -52,7 +52,7 @@
 	$(top_builddir)/libsrc/Tidy/libtidy.la \
 	$(top_builddir)/libsrc/util/libutil.la \
 	@srvrlibs@ \
-	-lpcre -lminizip
+	-lpcre2-8 -lminizip
 
 client_libs = \
 	$(top_builddir)/libsrc/Wi/libwic.la \
@@ -60,7 +60,7 @@
 	$(top_builddir)/libsrc/Thread/libthrs.la \
 	$(top_builddir)/libsrc/util/libutil.la \
 	@clntlibs@ \
-	-lpcre -lminizip
+	-lpcre2-8 -lminizip
 
 M2_SOURCES = chil.c
 M2_LDADD = \


More information about the debian-science-maintainers mailing list