[med-svn] [Git][med-team/phast][master] Attempt to port tp pcre2
Andreas Tille (@tille)
gitlab at salsa.debian.org
Thu Nov 18 18:15:12 GMT 2021
Andreas Tille pushed to branch master at Debian Med / phast
Commits:
a4cc806f by Andreas Tille at 2021-11-18T19:14:48+01:00
Attempt to port tp pcre2
- - - - -
3 changed files:
- debian/changelog
- + debian/patches/pcre2.patch
- debian/patches/series
Changes:
=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+phast (1.6+dfsg-3) UNRELEASED; urgency=medium
+
+ * Port to pcre2
+
+ -- Andreas Tille <tille at debian.org> Thu, 18 Nov 2021 17:43:05 +0100
+
phast (1.6+dfsg-2) unstable; urgency=medium
* Build-Depends: s/libpcre3-dev/libpcre2-dev/
=====================================
debian/patches/pcre2.patch
=====================================
@@ -0,0 +1,376 @@
+--- a/include/phast/stringsplus.h
++++ b/include/phast/stringsplus.h
+@@ -24,7 +24,8 @@
+ #ifndef STRINGSPLUS_H
+ #define STRINGSPLUS_H
+
+-#include <pcre.h>
++#define PCRE2_CODE_UNIT_WIDTH 8
++#include <pcre2.h>
+ #include "phast/lists.h"
+ #include "stdio.h"
+
+@@ -56,9 +57,6 @@ typedef struct {
+ int nchars; /**< Number of bytes currently allocated */
+ } String;
+
+-/** PCRE is another name for Regex */
+-typedef pcre Regex;
+-
+ /** \name String Allocate/Cleanup functions
+ \{ */
+
+@@ -99,7 +97,7 @@ void str_clear(String *s);
+
+ /** \} */
+
+-/* void str_match(String *s, Regexp *r); */ /* tags? */
++/* void str_match(String *s, pcre2_compile_context *r); */ /* tags? */
+
+ /** \name String Append functions */
+
+@@ -385,13 +383,13 @@ int str_split(String *s, const char* del
+ expression syntax.
+ @result Newly allocated and compiled Regex object.
+ */
+-Regex *str_re_new(const char *re_str);
++pcre2_code *str_re_new(const unsigned char *re_str);
+
+ /** Free resources associated with regular expression object.
+ @param re Regex object to free
+ @note The object itself is freed also.
+ */
+-void str_re_free(Regex *re);
++void str_re_free(pcre2_compile_context *re);
+
+ /** Test whether the specified string matches the specified regex.
+ @pre The list 'l' must be initialized externally if non-NULL.
+@@ -403,11 +401,11 @@ void str_re_free(Regex *re);
+ on no match, and -2 on error.
+ @note NULLs will be added for all non-matching groups in list 'l'
+ @note In the list 'l', the 0th substring corresponds to the entire regex.
+- @note This function uses the pcre_exec function of the PCRE
++ @note This function uses the pcre2_exec function of the PCRE2
+ regex package.
+ @warning Substrings added to List l are newly allocated and must be
+ freed externally. */
+-int str_re_match(String *s, Regex *re, List *l, int nsubexp);
++int str_re_match(String *s, pcre2_compile_context *re, List *l, int nsubexp);
+
+ /** Search the specified string for the first instance of the specified
+ regex.
+@@ -415,14 +413,14 @@ int str_re_match(String *s, Regex *re, L
+ @param start_offset The first start_offset characters will be ignored.
+ @param l (Optional) If non-NULL, it will be populated with substrings corresponding
+ to subexpressions, as described under str_re_match.
+- @note This function uses the pcre_exec function of the PCRE regex package.
++ @note This function uses the pcre2_exec function of the PCRE2 regex package.
+ @result Index of first match, -1 if no match exists, or -2 if an
+ internal error occurs.
+ @warning Substrings added to List l are newly allocated and must be
+ freed externally.
+ @see str_re_match
+ */
+-int str_re_search(String *s, Regex *re, int start_offset, List *l,
++int str_re_search(String *s, pcre2_compile_context *re, int start_offset, List *l,
+ int nsubexp);
+
+ /** \} */
+--- a/src/lib/base/phast_stringsplus.c
++++ b/src/lib/base/phast_stringsplus.c
+@@ -12,7 +12,8 @@
+
+ $Id: stringsplus.c,v 1.12 2009-02-19 23:33:48 agd27 Exp $ */
+
+-#include <pcre.h>
++#define PCRE2_CODE_UNIT_WIDTH 8
++#include <pcre2.h>
+ #include "phast/stringsplus.h"
+ #include "phast/misc.h"
+ #include <stdlib.h>
+@@ -462,29 +463,29 @@ int str_ends_with_charstr(String *s, con
+ return (strncmp(&s->chars[s->length - len], substr, len) == 0);
+ }
+
+-Regex *str_re_new(const char *re_str) {
+- Regex *re;
+- const char *errstr;
+- int erroffset;
++pcre2_code *str_re_new(const unsigned char *re_str) {
++ pcre2_code *re;
++ int errorcode;
++ PCRE2_SIZE erroffset;
+
+- re = pcre_compile(re_str, 0, &errstr, &erroffset, NULL);
++ re = pcre2_compile(re_str, PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroffset, NULL);
+ if (re == NULL) {
+- die("ERROR: cannot compile regular expression '%s' (%d): %s\n",
+- re_str, erroffset, errstr);
++ die("ERROR: cannot compile regular expression '%s' (%d): %d\n",
++ re_str, erroffset, errorcode);
+ }
+ return re;
+ }
+
+
+-//NOTE Regex are allocated by pcre; do not use sfree
+-void str_re_free(Regex *re) {
++//NOTE Regex are allocated by pcre2; do not use sfree
++void str_re_free(pcre2_compile_context *re) {
+ if (re != NULL)
+ free(re);
+ }
+
+
+ #define OVECCOUNT 300
+-int str_re_match_sub(String *s, Regex *re, List *l, int offset, int nsubexp,
++int str_re_match_sub(String *s, pcre2_compile_context *re, List *l, int offset, int nsubexp,
+ int *first_match) {
+ int i, len, rc, ovector[OVECCOUNT], rv;
+ String *substr;
+@@ -494,15 +495,15 @@ int str_re_match_sub(String *s, Regex *r
+ used or there will be a memory leak! */
+ if (l != NULL) lst_clear(l);
+
+- rc = pcre_exec(re, NULL, s->chars, s->length, offset, 0, ovector, OVECCOUNT);
+- if (rc == PCRE_ERROR_NOMATCH) return -1;
++ rc = pcre2_exec(re, NULL, s->chars, s->length, offset, 0, ovector, OVECCOUNT);
++ if (rc == PCRE2_ERROR_NOMATCH) return -1;
+ if (rc < 0) return -2; //any other error
+ if (first_match != NULL) (*first_match) = ovector[0];
+ rv = ovector[1]-ovector[0];
+ if (rc >= 0 && l != NULL) {
+ if (rc == 0) {
+ printf("nsubexp=%i rc=%i\n", nsubexp, rc);
+- fprintf(stderr, "Warning: pcre_exec only has room for %d captured substrings. May need to increase OVECCOUNT and re-compile\n", OVECCOUNT/3);
++ fprintf(stderr, "Warning: pcre2_exec only has room for %d captured substrings. May need to increase OVECCOUNT and re-compile\n", OVECCOUNT/3);
+ rc = OVECCOUNT/3;
+ }
+ for (i = 0; i < rc && i <= nsubexp; i++) {
+@@ -523,11 +524,11 @@ int str_re_match_sub(String *s, Regex *r
+ }
+
+
+-int str_re_match(String *s, Regex *re, List *l, int nsubexp) {
++int str_re_match(String *s, pcre2_compile_context *re, List *l, int nsubexp) {
+ return str_re_match_sub(s, re, l, 0, nsubexp, NULL);
+ }
+
+-int str_re_search(String *s, Regex *re, int start_offset, List *l,
++int str_re_search(String *s, pcre2_compile_context *re, int start_offset, List *l,
+ int nsubexp) {
+ int first_match_idx, rc;
+ rc = str_re_match_sub(s, re, l, start_offset, nsubexp, &first_match_idx);
+--- a/src/make-include.mk
++++ b/src/make-include.mk
+@@ -137,7 +137,7 @@ LIBS = -lphast -framework Accelerate -lc
+ else
+ ifdef CLAPACKPATH
+ ifneq ($(TARGETOS), Windows)
+- LIBS = -lphast -llapack -ltmglib -lblas -lc -lm -lpcre $(LDFLAGS)
++ LIBS = -lphast -llapack -ltmglib -lblas -lc -lm -lpcre2-8 $(LDFLAGS)
+ else
+ CFLAGS += -I${CLAPACKPATH}/INCLUDE -I${F2CPATH} -DPCRE_STATIC
+ LIBS = -lphast -lm ${CLAPACKPATH}/liblapack.a ${CLAPACKPATH}/libf2c.a ${CLAPACKPATH}/libblas.a
+--- a/src/dless/dlessP.c
++++ b/src/dless/dlessP.c
+@@ -196,7 +196,7 @@ void do_p_values(BDPhyloHmm *bdphmm, GFF
+ JumpProcess *jp;
+ List *types = lst_new_ptr(nnodes * 2), *type_lists = lst_new_ptr(nnodes * 2);
+ TreeModel *mod = bdphmm->phmm->mods[0]; /* nonconserved */
+- Regex *id_re = str_re_new(".*id \"([^\"]*)\"");
++ pcre2_compile_context *id_re = str_re_new(".*id \"([^\"]*)\"");
+ String *id = str_new(STR_SHORT_LEN);
+ List *l = lst_new_ptr(1);
+
+--- a/src/lib/base/phast_misc.c
++++ b/src/lib/base/phast_misc.c
+@@ -669,7 +669,7 @@ int draw_index(double *p, int size) {
+ character as well as "->" to indicate mapping. */
+ struct hash_table *make_name_hash(char *mapstr) {
+ Hashtable *retval = hsh_new(20);
+- Regex *map_re = str_re_new("^[[:space:]]*([A-Za-z0-9_]+)[[:space:]]*(->|=)[[:space:]]*([A-Za-z0-9_]+)[[:space:]]*");
++ pcre2_compile_context *map_re = str_re_new("^[[:space:]]*([A-Za-z0-9_]+)[[:space:]]*(->|=)[[:space:]]*([A-Za-z0-9_]+)[[:space:]]*");
+ List *mappings = lst_new_ptr(20), *names = lst_new_ptr(3);
+ String *s = str_new_charstr(mapstr);
+ int i;
+--- a/src/lib/feature/phast_bed.c
++++ b/src/lib/feature/phast_bed.c
+@@ -140,7 +140,7 @@ void gff_print_bed(FILE *OUTF, GFF_Set
+ if (lst_size(gff->features) == 0) return; /* now can assume at least one feature */
+
+ if (!use_groups) {
+- Regex *tag_val_re = str_re_new("[[:alnum:]_.]+[[:space:]]+(\"[^\"]*\"|[^[:space:]]+)");
++ pcre2_compile_context *tag_val_re = str_re_new("[[:alnum:]_.]+[[:space:]]+(\"[^\"]*\"|[^[:space:]]+)");
+ List *l = lst_new_ptr(2);
+ int ncols = 4;
+
+--- a/src/lib/feature/phast_category_map.c
++++ b/src/lib/feature/phast_category_map.c
+@@ -26,11 +26,11 @@ CategoryMap *cm_read(FILE *F) {
+ int cat, cat2, lineno, i, cm_read_error;
+ CategoryMap *cm = NULL;
+ CategoryRange *existing_range;
+- static Regex *cat_range_re = NULL;
+- static Regex *ncats_re = NULL;
+- static Regex *fill_re = NULL;
+- static Regex *label_re = NULL;
+- static Regex *extend_re = NULL;
++ static pcre2_compile_context *cat_range_re = NULL;
++ static pcre2_compile_context *ncats_re = NULL;
++ static pcre2_compile_context *fill_re = NULL;
++ static pcre2_compile_context *label_re = NULL;
++ static pcre2_compile_context *extend_re = NULL;
+ int has_dependencies = 0;
+
+ line = str_new(STR_SHORT_LEN);
+--- a/src/lib/feature/phast_gff.c
++++ b/src/lib/feature/phast_gff.c
+@@ -38,7 +38,7 @@ GFF_Set* gff_read_set(FILE *F) {
+ GFF_Feature *feat;
+ GFF_Set *set;
+ List *l, *substrs;
+- static Regex *spec_comment_re = NULL;
++ static pcre2_compile_context *spec_comment_re = NULL;
+
+ line = str_new(STR_LONG_LEN);
+ set = gff_new_set();
+@@ -267,7 +267,7 @@ GFF_Feature *gff_new_feature_genomic_pos
+ int score_is_null) {
+ GFF_Feature *retval = NULL;
+ List *substrs = lst_new_ptr(4);
+- static Regex *posre = NULL;
++ static pcre2_compile_context *posre = NULL;
+ if (posre == NULL)
+ posre = str_re_new("(chr[_a-zA-Z0-9]+):([0-9]+)-([0-9]+)([-+])?");
+
+@@ -667,7 +667,7 @@ void gff_sort_within_groups(GFF_Set *set
+ undefined values will be placed in a single group. */
+ void gff_group(GFF_Set *set, char *tag) {
+ char *tmpstr=smalloc((100+strlen(tag))*sizeof(char));
+- Regex *tag_re;
++ pcre2_compile_context *tag_re;
+ List *l = lst_new_ptr(1);
+ int est_no_groups = max(lst_size(set->features) / 10, 1);
+ Hashtable *hash = hsh_new(est_no_groups);
+--- a/src/lib/motif/phast_tfbs.c
++++ b/src/lib/motif/phast_tfbs.c
+@@ -137,8 +137,8 @@ List *pwm_read(const char *filename) {
+ List *l = lst_new_ptr(3);
+ List *probabilitiesStr = lst_new_ptr(4);
+ List *probabilitiesDbl;
+- Regex *pssm_re = NULL;
+- Regex *motif_name_re = NULL;
++ pcre2_compile_context *pssm_re = NULL;
++ pcre2_compile_context *motif_name_re = NULL;
+ int alphabetLength;
+
+ result = lst_new_ptr(1);
+@@ -215,7 +215,7 @@ int ms_alph_has_lowercase(MS *ms) {
+ MS *ms_read(const char *filename, const char *alphabet) {
+ List *names = lst_new_ptr(10);
+ List *seqs = lst_new_ptr(10);
+- static Regex *descrip_re = NULL;
++ static pcre2_compile_context *descrip_re = NULL;
+ int i, nseqs, j, do_toupper, line_no;
+ String *line = str_new(STR_MED_LEN);
+ List *l = lst_new_ptr(2);
+--- a/src/lib/msa/phast_local_alignment.c
++++ b/src/lib/msa/phast_local_alignment.c
+@@ -47,7 +47,7 @@ LocalPwAlignment *la_read_lav(FILE *F, i
+ int line_no=0;
+ LocalPwAlignment *lpwa = la_new();
+ List *fields = lst_new_ptr(6);
+- Regex *stanza_start_re = str_re_new("^([dshaxm])[[:space:]]*{");
++ pcre2_compile_context *stanza_start_re = str_re_new("^([dshaxm])[[:space:]]*{");
+ AlignmentBlock *aln_block = NULL;
+ char stanza_type = '\0';
+ int i;
+--- a/src/lib/msa/phast_msa.c
++++ b/src/lib/msa/phast_msa.c
+@@ -253,7 +253,7 @@ MSA *msa_create_copy(MSA *msa, int suff_
+ MSA *msa_read_fasta(FILE *F, char *alphabet) {
+ List *names = lst_new_ptr(10);
+ List *seqs = lst_new_ptr(10);
+- static Regex *descrip_re = NULL;
++ static pcre2_compile_context *descrip_re = NULL;
+ int maxlen, i, nseqs, j, do_toupper, line_no;
+ String *line = str_new(STR_MED_LEN);
+ List *l = lst_new_ptr(2);
+@@ -1921,7 +1921,7 @@ GFF_Set *msa_get_informative_feats(MSA *
+
+ /* read and return a single sequence from a FASTA file */
+ String *msa_read_seq_fasta(FILE *F) {
+- static Regex *descrip_re = NULL;
++ static pcre2_compile_context *descrip_re = NULL;
+ String *line = str_new(STR_MED_LEN);
+ String *seq = NULL;
+
+@@ -2581,7 +2581,7 @@ msa_format_type msa_format_for_content(F
+ msa_format_type retval = UNKNOWN_FORMAT;
+ String *line = str_new(STR_MED_LEN);
+ List *matches = lst_new_ptr(3);
+- Regex *ss_re, *phylip_re, *fasta_re, *lav_re, *maf_re;
++ pcre2_compile_context *ss_re, *phylip_re, *fasta_re, *lav_re, *maf_re;
+
+ //using peek instead of read as we don't want to affect file/stream position
+ str_peek_next_line(line, F);
+--- a/src/lib/msa/phast_multi_msa.c
++++ b/src/lib/msa/phast_multi_msa.c
+@@ -51,9 +51,9 @@
+ abort if the sequence contains a character not in the alphabet. */
+ Multi_MSA *multimsa_new_from_files(FILE *F) {
+
+- Regex *blocks_re = str_re_new("#[[:space:]]*BLOCKS[[:space:]]*=[[:space:]]*([0-9]+)");
+- Regex *alph_re = str_re_new("#[[:space:]]*ALPHABET[[:space:]]*=[[:space:]]*([A-Z]+)");
+- Regex *format_re = str_re_new("#[[:space:]]*FORMAT[[:space:]]*=[[:space:]]*([A-Z]+)");
++ pcre2_compile_context *blocks_re = str_re_new("#[[:space:]]*BLOCKS[[:space:]]*=[[:space:]]*([0-9]+)");
++ pcre2_compile_context *alph_re = str_re_new("#[[:space:]]*ALPHABET[[:space:]]*=[[:space:]]*([A-Z]+)");
++ pcre2_compile_context *format_re = str_re_new("#[[:space:]]*FORMAT[[:space:]]*=[[:space:]]*([A-Z]+)");
+
+ int i, num_msa, line_no=0;
+ char *msa_fname;
+--- a/src/lib/msa/phast_sufficient_stats.c
++++ b/src/lib/msa/phast_sufficient_stats.c
+@@ -649,7 +649,7 @@ void ss_write(MSA *msa, FILE *F, int sho
+ /* make reading order optional? alphabet argument overrides alphabet
+ in file (use NULL to use version in file) */
+ MSA* ss_read(FILE *F, char *alphabet) {
+- Regex *nseqs_re, *length_re, *tuple_size_re, *ntuples_re, *tuple_re,
++ pcre2_compile_context *nseqs_re, *length_re, *tuple_size_re, *ntuples_re, *tuple_re,
+ *names_re, *alph_re, *ncats_re, *order_re, *offset_re;
+ String *line, *alph = NULL;
+ int nseqs, length, tuple_size, ntuples, i, ncats = -99, header_done = 0,
+--- a/src/lib/phylo/phast_phylo_p_print.c
++++ b/src/lib/phylo/phast_phylo_p_print.c
+@@ -749,7 +749,7 @@ void print_feats_generic(FILE *outfile,
+ String *name;
+ va_list ap;
+ double *data[ncols+1];
+- Regex *tag_val_re = str_re_new("[[:alnum:]_.]+[[:space:]]+(\"[^\"]*\"|[^[:space:]]+)");
++ pcre2_compile_context *tag_val_re = str_re_new("[[:alnum:]_.]+[[:space:]]+(\"[^\"]*\"|[^[:space:]]+)");
+ List *l = lst_new_ptr(2);
+ char **colname;
+ List **resultList=NULL;
+--- a/src/prequel/phast_pbs_code.c
++++ b/src/prequel/phast_pbs_code.c
+@@ -85,7 +85,7 @@ void pbs_free(PbsCode *code) {
+ }
+
+ PbsCode *pbs_new_from_file(FILE *F) {
+- Regex *nrows_re = str_re_new("##NROWS[[:space:]]*=[[:space:]]*([0-9]+)"),
++ pcre2_compile_context *nrows_re = str_re_new("##NROWS[[:space:]]*=[[:space:]]*([0-9]+)"),
+ *dimension_re = str_re_new("##DIMENSION[[:space:]]*=[[:space:]]*([0-9]+)"),
+ *nbytes_re = str_re_new("##NBYTES[[:space:]]*=[[:space:]]*([0-9]+)"),
+ *codesize_re = str_re_new("##CODESIZE[[:space:]]*=[[:space:]]*([0-9]+)");
+--- a/src/util/msa_view.c
++++ b/src/util/msa_view.c
+@@ -358,7 +358,7 @@ OPTIONS:\n\
+
+ void fill_with_Ns(MSA *msa, List *fill_N_list, msa_coord_map *map) {
+ int i, j, nseq, nstart, nend;
+- Regex* fill_N_re = str_re_new("([[:digit:]]+):([[:digit:]]+)-([[:digit:]]+)");
++ pcre2_compile_context* fill_N_re = str_re_new("([[:digit:]]+):([[:digit:]]+)-([[:digit:]]+)");
+ List *word_list = lst_new_ptr(4);
+ for (i = 0; i < lst_size(fill_N_list); i++) {
+ String *s = lst_get_ptr(fill_N_list, i);
=====================================
debian/patches/series
=====================================
@@ -4,3 +4,4 @@ use_debian_packaged_libpcre.patch
use_debian_packaged_help2man.patch
hardening.patch
fix_test_makefile.patch
+pcre2.patch
View it on GitLab: https://salsa.debian.org/med-team/phast/-/commit/a4cc806f7b1ba38d10769db7f74f57d030e2b7fc
--
View it on GitLab: https://salsa.debian.org/med-team/phast/-/commit/a4cc806f7b1ba38d10769db7f74f57d030e2b7fc
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20211118/eaf5a7f6/attachment-0001.htm>
More information about the debian-med-commit
mailing list