[med-svn] [Git][med-team/phast][master] Attempt to port tp pcre2

Andreas Tille (@tille) gitlab at salsa.debian.org
Thu Nov 18 18:15:12 GMT 2021



Andreas Tille pushed to branch master at Debian Med / phast


Commits:
a4cc806f by Andreas Tille at 2021-11-18T19:14:48+01:00
Attempt to port tp pcre2

- - - - -


3 changed files:

- debian/changelog
- + debian/patches/pcre2.patch
- debian/patches/series


Changes:

=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+phast (1.6+dfsg-3) UNRELEASED; urgency=medium
+
+  * Port to pcre2
+
+ -- Andreas Tille <tille at debian.org>  Thu, 18 Nov 2021 17:43:05 +0100
+
 phast (1.6+dfsg-2) unstable; urgency=medium
 
   * Build-Depends: s/libpcre3-dev/libpcre2-dev/


=====================================
debian/patches/pcre2.patch
=====================================
@@ -0,0 +1,376 @@
+--- a/include/phast/stringsplus.h
++++ b/include/phast/stringsplus.h
+@@ -24,7 +24,8 @@
+ #ifndef STRINGSPLUS_H
+ #define STRINGSPLUS_H
+ 
+-#include <pcre.h>
++#define PCRE2_CODE_UNIT_WIDTH 8
++#include <pcre2.h>
+ #include "phast/lists.h"
+ #include "stdio.h"
+ 
+@@ -56,9 +57,6 @@ typedef struct {
+   int nchars;			/**< Number of bytes currently allocated */
+ } String;
+ 
+-/** PCRE is another name for Regex */
+-typedef pcre Regex;
+-				
+ /** \name String Allocate/Cleanup functions 
+ \{ */
+ 
+@@ -99,7 +97,7 @@ void str_clear(String *s);
+ 
+ /** \} */
+ 
+-/* void str_match(String *s, Regexp *r); */ /* tags? */
++/* void str_match(String *s, pcre2_compile_context *r); */ /* tags? */
+ 
+ /** \name String Append functions */
+ 
+@@ -385,13 +383,13 @@ int str_split(String *s, const char* del
+    expression syntax.
+    @result Newly allocated and compiled Regex object.
+  */
+-Regex *str_re_new(const char *re_str);
++pcre2_code *str_re_new(const unsigned char *re_str);
+ 
+ /** Free resources associated with regular expression object. 
+     @param re Regex object to free
+     @note The object itself is freed also. 
+ */
+-void str_re_free(Regex *re);
++void str_re_free(pcre2_compile_context *re);
+ 
+ /** Test whether the specified string matches the specified regex.
+    @pre The list 'l' must be initialized externally if non-NULL.  
+@@ -403,11 +401,11 @@ void str_re_free(Regex *re);
+    on no match, and -2 on error.
+    @note NULLs will be added for all non-matching groups in list 'l'
+    @note In the list 'l', the 0th substring corresponds to the entire regex. 
+-   @note This function uses the pcre_exec function of the PCRE
++   @note This function uses the pcre2_exec function of the PCRE2
+    regex package.
+    @warning Substrings added to List l are newly allocated and must be
+    freed externally. */
+-int str_re_match(String *s, Regex *re, List *l, int nsubexp);
++int str_re_match(String *s, pcre2_compile_context *re, List *l, int nsubexp);
+ 
+ /** Search the specified string for the first instance of the specified
+    regex.  
+@@ -415,14 +413,14 @@ int str_re_match(String *s, Regex *re, L
+    @param start_offset The first start_offset characters will be ignored.
+    @param l (Optional) If non-NULL, it will be populated with substrings corresponding
+    to subexpressions, as described under str_re_match.  
+-   @note This function uses the pcre_exec function of the PCRE regex package.
++   @note This function uses the pcre2_exec function of the PCRE2 regex package.
+    @result Index of first match, -1 if no match exists, or -2 if an
+    internal error occurs. 
+    @warning Substrings added to List l are newly allocated and must be
+    freed externally. 
+    @see str_re_match
+ */
+-int str_re_search(String *s, Regex *re, int start_offset, List *l, 
++int str_re_search(String *s, pcre2_compile_context *re, int start_offset, List *l, 
+                   int nsubexp);
+ 
+ /** \} */
+--- a/src/lib/base/phast_stringsplus.c
++++ b/src/lib/base/phast_stringsplus.c
+@@ -12,7 +12,8 @@
+    
+    $Id: stringsplus.c,v 1.12 2009-02-19 23:33:48 agd27 Exp $ */
+ 
+-#include <pcre.h>
++#define PCRE2_CODE_UNIT_WIDTH 8
++#include <pcre2.h>
+ #include "phast/stringsplus.h"
+ #include "phast/misc.h"
+ #include <stdlib.h>
+@@ -462,29 +463,29 @@ int str_ends_with_charstr(String *s, con
+   return (strncmp(&s->chars[s->length - len], substr, len) == 0);
+ }
+ 
+-Regex *str_re_new(const char *re_str) {
+-  Regex *re;
+-  const char *errstr;
+-  int erroffset;
++pcre2_code *str_re_new(const unsigned char *re_str) {
++  pcre2_code *re;
++  int errorcode;
++  PCRE2_SIZE erroffset;
+ 
+-  re = pcre_compile(re_str, 0, &errstr, &erroffset, NULL);
++  re = pcre2_compile(re_str, PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroffset, NULL);
+   if (re == NULL) {
+-    die("ERROR: cannot compile regular expression '%s' (%d): %s\n",
+-	re_str, erroffset, errstr);
++    die("ERROR: cannot compile regular expression '%s' (%d): %d\n",
++	re_str, erroffset, errorcode);
+   }
+   return re;
+ }
+ 
+ 
+-//NOTE Regex are allocated by pcre; do not use sfree
+-void str_re_free(Regex *re) {
++//NOTE Regex are allocated by pcre2; do not use sfree
++void str_re_free(pcre2_compile_context *re) {
+   if (re != NULL)
+     free(re);
+ }
+ 
+ 
+ #define OVECCOUNT 300
+-int str_re_match_sub(String *s, Regex *re, List *l, int offset, int nsubexp, 
++int str_re_match_sub(String *s, pcre2_compile_context *re, List *l, int offset, int nsubexp, 
+ 		     int *first_match) {
+   int i, len, rc, ovector[OVECCOUNT], rv;
+   String *substr;
+@@ -494,15 +495,15 @@ int str_re_match_sub(String *s, Regex *r
+      used or there will be a memory leak! */
+   if (l != NULL) lst_clear(l);
+ 
+-  rc = pcre_exec(re, NULL, s->chars, s->length, offset, 0, ovector, OVECCOUNT);
+-  if (rc == PCRE_ERROR_NOMATCH) return -1;
++  rc = pcre2_exec(re, NULL, s->chars, s->length, offset, 0, ovector, OVECCOUNT);
++  if (rc == PCRE2_ERROR_NOMATCH) return -1;
+   if (rc < 0) return -2;  //any other error
+   if (first_match != NULL) (*first_match) = ovector[0];
+   rv = ovector[1]-ovector[0];
+   if (rc >= 0 && l != NULL) {
+     if (rc == 0) {
+       printf("nsubexp=%i rc=%i\n", nsubexp, rc);
+-      fprintf(stderr, "Warning: pcre_exec only has room for %d captured substrings.  May need to increase OVECCOUNT and re-compile\n", OVECCOUNT/3);
++      fprintf(stderr, "Warning: pcre2_exec only has room for %d captured substrings.  May need to increase OVECCOUNT and re-compile\n", OVECCOUNT/3);
+       rc = OVECCOUNT/3;
+     }
+     for (i = 0; i < rc && i <= nsubexp; i++) {
+@@ -523,11 +524,11 @@ int str_re_match_sub(String *s, Regex *r
+ }
+ 
+ 
+-int str_re_match(String *s, Regex *re, List *l, int nsubexp) {
++int str_re_match(String *s, pcre2_compile_context *re, List *l, int nsubexp) {
+   return str_re_match_sub(s, re, l, 0, nsubexp, NULL);
+ }
+ 
+-int str_re_search(String *s, Regex *re, int start_offset, List *l,
++int str_re_search(String *s, pcre2_compile_context *re, int start_offset, List *l,
+ 		  int nsubexp) {
+   int first_match_idx, rc;
+   rc = str_re_match_sub(s, re, l, start_offset, nsubexp, &first_match_idx);
+--- a/src/make-include.mk
++++ b/src/make-include.mk
+@@ -137,7 +137,7 @@ LIBS = -lphast -framework Accelerate -lc
+ else
+ ifdef CLAPACKPATH
+ ifneq ($(TARGETOS), Windows)
+-  LIBS = -lphast -llapack -ltmglib -lblas -lc -lm -lpcre $(LDFLAGS)
++  LIBS = -lphast -llapack -ltmglib -lblas -lc -lm -lpcre2-8 $(LDFLAGS)
+ else
+   CFLAGS += -I${CLAPACKPATH}/INCLUDE -I${F2CPATH} -DPCRE_STATIC
+   LIBS = -lphast -lm  ${CLAPACKPATH}/liblapack.a ${CLAPACKPATH}/libf2c.a ${CLAPACKPATH}/libblas.a
+--- a/src/dless/dlessP.c
++++ b/src/dless/dlessP.c
+@@ -196,7 +196,7 @@ void do_p_values(BDPhyloHmm *bdphmm, GFF
+   JumpProcess *jp;
+   List *types = lst_new_ptr(nnodes * 2), *type_lists = lst_new_ptr(nnodes * 2);
+   TreeModel *mod = bdphmm->phmm->mods[0]; /* nonconserved */
+-  Regex *id_re = str_re_new(".*id \"([^\"]*)\"");
++  pcre2_compile_context *id_re = str_re_new(".*id \"([^\"]*)\"");
+   String *id = str_new(STR_SHORT_LEN);
+   List *l = lst_new_ptr(1);
+ 
+--- a/src/lib/base/phast_misc.c
++++ b/src/lib/base/phast_misc.c
+@@ -669,7 +669,7 @@ int draw_index(double *p, int size) {
+    character as well as "->" to indicate mapping.  */
+ struct hash_table *make_name_hash(char *mapstr) {
+   Hashtable *retval = hsh_new(20);
+-  Regex *map_re = str_re_new("^[[:space:]]*([A-Za-z0-9_]+)[[:space:]]*(->|=)[[:space:]]*([A-Za-z0-9_]+)[[:space:]]*");
++  pcre2_compile_context *map_re = str_re_new("^[[:space:]]*([A-Za-z0-9_]+)[[:space:]]*(->|=)[[:space:]]*([A-Za-z0-9_]+)[[:space:]]*");
+   List *mappings = lst_new_ptr(20), *names = lst_new_ptr(3);
+   String *s = str_new_charstr(mapstr);
+   int i;
+--- a/src/lib/feature/phast_bed.c
++++ b/src/lib/feature/phast_bed.c
+@@ -140,7 +140,7 @@ void gff_print_bed(FILE *OUTF,  GFF_Set
+   if (lst_size(gff->features) == 0) return; /* now can assume at least one feature */
+ 
+   if (!use_groups) {
+-    Regex *tag_val_re = str_re_new("[[:alnum:]_.]+[[:space:]]+(\"[^\"]*\"|[^[:space:]]+)");
++    pcre2_compile_context *tag_val_re = str_re_new("[[:alnum:]_.]+[[:space:]]+(\"[^\"]*\"|[^[:space:]]+)");
+     List *l = lst_new_ptr(2);
+     int ncols = 4;
+ 
+--- a/src/lib/feature/phast_category_map.c
++++ b/src/lib/feature/phast_category_map.c
+@@ -26,11 +26,11 @@ CategoryMap *cm_read(FILE *F) {
+   int cat, cat2, lineno, i, cm_read_error;
+   CategoryMap *cm = NULL;
+   CategoryRange *existing_range;
+-  static Regex *cat_range_re = NULL;
+-  static Regex *ncats_re = NULL;
+-  static Regex *fill_re = NULL;
+-  static Regex *label_re = NULL;
+-  static Regex *extend_re = NULL;
++  static pcre2_compile_context *cat_range_re = NULL;
++  static pcre2_compile_context *ncats_re = NULL;
++  static pcre2_compile_context *fill_re = NULL;
++  static pcre2_compile_context *label_re = NULL;
++  static pcre2_compile_context *extend_re = NULL;
+   int has_dependencies = 0;
+ 
+   line = str_new(STR_SHORT_LEN);
+--- a/src/lib/feature/phast_gff.c
++++ b/src/lib/feature/phast_gff.c
+@@ -38,7 +38,7 @@ GFF_Set* gff_read_set(FILE *F) {
+   GFF_Feature *feat;
+   GFF_Set *set;
+   List *l, *substrs;
+-  static Regex *spec_comment_re = NULL;
++  static pcre2_compile_context *spec_comment_re = NULL;
+ 
+   line = str_new(STR_LONG_LEN);
+   set = gff_new_set();
+@@ -267,7 +267,7 @@ GFF_Feature *gff_new_feature_genomic_pos
+                                          int score_is_null) {
+   GFF_Feature *retval = NULL;
+   List *substrs = lst_new_ptr(4);
+-  static Regex *posre = NULL;
++  static pcre2_compile_context *posre = NULL;
+   if (posre == NULL)
+     posre = str_re_new("(chr[_a-zA-Z0-9]+):([0-9]+)-([0-9]+)([-+])?");
+ 
+@@ -667,7 +667,7 @@ void gff_sort_within_groups(GFF_Set *set
+     undefined values will be placed in a single group. */
+ void gff_group(GFF_Set *set, char *tag) {
+   char *tmpstr=smalloc((100+strlen(tag))*sizeof(char));
+-  Regex *tag_re;
++  pcre2_compile_context *tag_re;
+   List *l = lst_new_ptr(1);
+   int est_no_groups = max(lst_size(set->features) / 10, 1);
+   Hashtable *hash = hsh_new(est_no_groups);
+--- a/src/lib/motif/phast_tfbs.c
++++ b/src/lib/motif/phast_tfbs.c
+@@ -137,8 +137,8 @@ List *pwm_read(const char *filename) {
+   List *l = lst_new_ptr(3);
+   List *probabilitiesStr = lst_new_ptr(4);
+   List *probabilitiesDbl;
+-  Regex *pssm_re = NULL;
+-  Regex *motif_name_re = NULL;
++  pcre2_compile_context *pssm_re = NULL;
++  pcre2_compile_context *motif_name_re = NULL;
+   int alphabetLength;
+ 
+   result = lst_new_ptr(1);
+@@ -215,7 +215,7 @@ int ms_alph_has_lowercase(MS *ms) {
+ MS *ms_read(const char *filename, const char *alphabet) {
+   List *names = lst_new_ptr(10);
+   List *seqs = lst_new_ptr(10);
+-  static Regex *descrip_re = NULL;
++  static pcre2_compile_context *descrip_re = NULL;
+   int i, nseqs, j, do_toupper, line_no;
+   String *line = str_new(STR_MED_LEN);
+   List *l = lst_new_ptr(2);
+--- a/src/lib/msa/phast_local_alignment.c
++++ b/src/lib/msa/phast_local_alignment.c
+@@ -47,7 +47,7 @@ LocalPwAlignment *la_read_lav(FILE *F, i
+   int line_no=0;
+   LocalPwAlignment *lpwa = la_new();
+   List *fields = lst_new_ptr(6);
+-  Regex *stanza_start_re = str_re_new("^([dshaxm])[[:space:]]*{");
++  pcre2_compile_context *stanza_start_re = str_re_new("^([dshaxm])[[:space:]]*{");
+   AlignmentBlock *aln_block = NULL;
+   char stanza_type = '\0';
+   int i;
+--- a/src/lib/msa/phast_msa.c
++++ b/src/lib/msa/phast_msa.c
+@@ -253,7 +253,7 @@ MSA *msa_create_copy(MSA *msa, int suff_
+ MSA *msa_read_fasta(FILE *F, char *alphabet) {
+   List *names = lst_new_ptr(10);
+   List *seqs = lst_new_ptr(10);
+-  static Regex *descrip_re = NULL;
++  static pcre2_compile_context *descrip_re = NULL;
+   int maxlen, i, nseqs, j, do_toupper, line_no;
+   String *line = str_new(STR_MED_LEN);
+   List *l = lst_new_ptr(2);
+@@ -1921,7 +1921,7 @@ GFF_Set *msa_get_informative_feats(MSA *
+ 
+ /* read and return a single sequence from a FASTA file */
+ String *msa_read_seq_fasta(FILE *F) {
+-  static Regex *descrip_re = NULL;
++  static pcre2_compile_context *descrip_re = NULL;
+   String *line = str_new(STR_MED_LEN);
+   String *seq = NULL;
+ 
+@@ -2581,7 +2581,7 @@ msa_format_type msa_format_for_content(F
+   msa_format_type retval = UNKNOWN_FORMAT;
+   String *line = str_new(STR_MED_LEN);
+   List *matches = lst_new_ptr(3);
+-  Regex *ss_re, *phylip_re, *fasta_re, *lav_re, *maf_re;  
++  pcre2_compile_context *ss_re, *phylip_re, *fasta_re, *lav_re, *maf_re;  
+   
+   //using peek instead of read as we don't want to affect file/stream position
+   str_peek_next_line(line, F);
+--- a/src/lib/msa/phast_multi_msa.c
++++ b/src/lib/msa/phast_multi_msa.c
+@@ -51,9 +51,9 @@
+     abort if the sequence contains a character not in the alphabet. */
+ Multi_MSA *multimsa_new_from_files(FILE *F) {
+ 
+-  Regex *blocks_re = str_re_new("#[[:space:]]*BLOCKS[[:space:]]*=[[:space:]]*([0-9]+)");
+-  Regex *alph_re = str_re_new("#[[:space:]]*ALPHABET[[:space:]]*=[[:space:]]*([A-Z]+)");
+-  Regex *format_re = str_re_new("#[[:space:]]*FORMAT[[:space:]]*=[[:space:]]*([A-Z]+)");
++  pcre2_compile_context *blocks_re = str_re_new("#[[:space:]]*BLOCKS[[:space:]]*=[[:space:]]*([0-9]+)");
++  pcre2_compile_context *alph_re = str_re_new("#[[:space:]]*ALPHABET[[:space:]]*=[[:space:]]*([A-Z]+)");
++  pcre2_compile_context *format_re = str_re_new("#[[:space:]]*FORMAT[[:space:]]*=[[:space:]]*([A-Z]+)");
+   
+   int i, num_msa, line_no=0;
+   char *msa_fname;
+--- a/src/lib/msa/phast_sufficient_stats.c
++++ b/src/lib/msa/phast_sufficient_stats.c
+@@ -649,7 +649,7 @@ void ss_write(MSA *msa, FILE *F, int sho
+ /* make reading order optional?  alphabet argument overrides alphabet
+    in file (use NULL to use version in file) */
+ MSA* ss_read(FILE *F, char *alphabet) {
+-  Regex *nseqs_re, *length_re, *tuple_size_re, *ntuples_re, *tuple_re, 
++  pcre2_compile_context *nseqs_re, *length_re, *tuple_size_re, *ntuples_re, *tuple_re, 
+     *names_re, *alph_re, *ncats_re, *order_re, *offset_re;
+   String *line, *alph = NULL;
+   int nseqs, length, tuple_size, ntuples, i, ncats = -99, header_done = 0, 
+--- a/src/lib/phylo/phast_phylo_p_print.c
++++ b/src/lib/phylo/phast_phylo_p_print.c
+@@ -749,7 +749,7 @@ void print_feats_generic(FILE *outfile,
+   String *name;
+   va_list ap;
+   double *data[ncols+1];
+-  Regex *tag_val_re = str_re_new("[[:alnum:]_.]+[[:space:]]+(\"[^\"]*\"|[^[:space:]]+)");
++  pcre2_compile_context *tag_val_re = str_re_new("[[:alnum:]_.]+[[:space:]]+(\"[^\"]*\"|[^[:space:]]+)");
+   List *l = lst_new_ptr(2);
+   char **colname;
+   List **resultList=NULL;
+--- a/src/prequel/phast_pbs_code.c
++++ b/src/prequel/phast_pbs_code.c
+@@ -85,7 +85,7 @@ void pbs_free(PbsCode *code) {
+ }
+ 
+ PbsCode *pbs_new_from_file(FILE *F) {
+-  Regex *nrows_re = str_re_new("##NROWS[[:space:]]*=[[:space:]]*([0-9]+)"),
++  pcre2_compile_context *nrows_re = str_re_new("##NROWS[[:space:]]*=[[:space:]]*([0-9]+)"),
+     *dimension_re = str_re_new("##DIMENSION[[:space:]]*=[[:space:]]*([0-9]+)"),
+     *nbytes_re = str_re_new("##NBYTES[[:space:]]*=[[:space:]]*([0-9]+)"),
+     *codesize_re = str_re_new("##CODESIZE[[:space:]]*=[[:space:]]*([0-9]+)");
+--- a/src/util/msa_view.c
++++ b/src/util/msa_view.c
+@@ -358,7 +358,7 @@ OPTIONS:\n\
+ 
+ void fill_with_Ns(MSA *msa, List *fill_N_list, msa_coord_map *map) {
+   int i, j, nseq, nstart, nend;
+-  Regex* fill_N_re = str_re_new("([[:digit:]]+):([[:digit:]]+)-([[:digit:]]+)");
++  pcre2_compile_context* fill_N_re = str_re_new("([[:digit:]]+):([[:digit:]]+)-([[:digit:]]+)");
+   List *word_list = lst_new_ptr(4);
+   for (i = 0; i < lst_size(fill_N_list); i++) {
+     String *s = lst_get_ptr(fill_N_list, i);


=====================================
debian/patches/series
=====================================
@@ -4,3 +4,4 @@ use_debian_packaged_libpcre.patch
 use_debian_packaged_help2man.patch
 hardening.patch
 fix_test_makefile.patch
+pcre2.patch



View it on GitLab: https://salsa.debian.org/med-team/phast/-/commit/a4cc806f7b1ba38d10769db7f74f57d030e2b7fc

-- 
View it on GitLab: https://salsa.debian.org/med-team/phast/-/commit/a4cc806f7b1ba38d10769db7f74f57d030e2b7fc
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20211118/eaf5a7f6/attachment-0001.htm>


More information about the debian-med-commit mailing list