[Rpm-maint] [Suse patch] Convert changelog and i18n headers to current locale

Panu Matilainen pmatilai at redhat.com
Mon Jun 4 12:56:09 UTC 2007


I can kinda see the point behind this, but it's just scary. Wouldn't 
this be a job for iconv or something instead?

Further, as pointed out here by JBJ, it doesn't really solve the problem 
of potentially having multiple encodings present in the spec: 
https://lists.dulug.duke.edu/pipermail/rpm-devel/2006-March/000931.html

Character encoding + conversion business makes me want to stick my head 
into sand (and stay there)...

 	- Panu -

---

Convert changelog and i18n header elements to current locale.
[#43347], rh#140050

Already in rpm-4.4.7

--- ./lib/formats.c.orig	2005-01-26 04:46:54.000000000 +0000
+++ ./lib/formats.c	2006-03-17 15:27:06.000000000 +0000
@@ -2,6 +2,7 @@
   * \file lib/formats.c
   */

+#include <wchar.h>
  #include "system.h"
  #include "rpmio_internal.h"
  #include <rpmlib.h>
@@ -18,6 +19,114 @@
  /*@access pgpDig @*/
  /*@access pgpDigParams @*/

+static const char * strtolocale(const char *str)
+{
+    wchar_t *wstr, *wp;
+    const unsigned char *cp;
+    char *cc;
+    int state = 0;
+    int c;
+    int ccl, cca, mb_cur_max;
+    size_t l;
+    mbstate_t ps;
+    int strisutf8 = 1;
+    int locisutf8 = 1;
+
+    if (!str)
+	return 0;
+    if (!*str)
+	return str;
+    wstr = (wchar_t *)xmalloc((strlen(str) + 1) * sizeof(*wstr));
+    wp = wstr;
+    cp = (const unsigned char *)str;
+    while ((c = *cp++) != 0) {
+	if (state) {
+	    if ((c & 0xc0) != 0x80) {
+		/* encoding error */
+		break;
+	    }
+	    c = (c & 0x3f) | (state << 6);
+	    if (!(state & 0x40000000)) {
+	      /* check for overlong sequences */
+	        if ((c & 0x820823e0) == 0x80000000)
+		    c = 0xfdffffff;
+	        else if ((c & 0x020821f0) == 0x02000000)
+		    c = 0xfff7ffff;
+	        else if ((c & 0x000820f8) == 0x00080000)
+		    c = 0xffffd000;
+	        else if ((c & 0x0000207c) == 0x00002000)
+		    c = 0xffffff70;
+	    }
+	} else {
+	    /* new sequence */
+	    if (c >= 0xfe)
+		c = 0xfffd;
+	    else if (c >= 0xfc)
+		c = (c & 0x01) | 0xbffffffc;    /* 5 bytes to follow */
+	    else if (c >= 0xf8)
+		c = (c & 0x03) | 0xbfffff00;    /* 4 */
+	    else if (c >= 0xf0)
+		c = (c & 0x07) | 0xbfffc000;    /* 3 */
+	    else if (c >= 0xe0)
+		c = (c & 0x0f) | 0xbff00000;    /* 2 */
+	    else if (c >= 0xc2)
+		c = (c & 0x1f) | 0xfc000000;    /* 1 */
+	    else if (c >= 0xc0)
+		c = 0xfdffffff;         /* overlong */
+	    else if (c >= 0x80)
+		c = 0xfffd;
+        }
+	state = (c & 0x80000000) ? c : 0;
+	if (state)
+	    continue;
+	*wp++ = (wchar_t)c;
+    }
+    if (state) {
+	/* encoding error, assume latin1 */
+        strisutf8 = 0;
+	cp = (const unsigned char *)str;
+	wp = wstr;
+	while ((c = *cp++) != 0) {
+	    *wp++ = (wchar_t)c;
+	}
+    }
+    *wp = 0;
+    mb_cur_max = MB_CUR_MAX;
+    memset(&ps, 0, sizeof(ps));
+    cc = xmalloc(mb_cur_max);
+    /* test locale encoding */
+    if (wcrtomb(cc, 0x20ac, &ps) != 3 || memcmp(cc, "\342\202\254", 3))
+	locisutf8 = 0;
+    if (locisutf8 == strisutf8) {
+	wstr = _free(wstr);
+	return str;
+    }
+    str = _free(str);
+    memset(&ps, 0, sizeof(ps));
+    ccl = cca = 0;
+    for (wp = wstr; ; wp++) {
+	l = wcrtomb(cc + ccl, *wp, &ps);
+	if (*wp == 0)
+	    break;
+	if (l == (size_t)-1) {
+	    if (*wp < (wchar_t)256 && mbsinit(&ps)) {
+		cc[ccl] = *wp;
+		l = 1;
+	    } else
+	        l = wcrtomb(cc + ccl, (wchar_t)'?', &ps);
+	}
+        if (l == 0 || l == (size_t)-1)
+	    continue;
+        ccl += l;
+        if (ccl > cca) {
+	    cca = ccl + 16;
+	    cc = xrealloc(cc, cca + mb_cur_max);
+	}
+    }
+    wstr = _free(wstr);
+    return (const char *)cc;
+}
+
  /**
   * Identify type of trigger.
   * @param type		tag type
@@ -1077,6 +1220,7 @@ static int i18nTag(Header h, int_32 tag,

      if (rc && (*data) != NULL) {
  	*data = xstrdup(*data);
+	*data = strtolocale(*data);
  	*freeData = 1;
  	return 0;
      }
@@ -1088,6 +1232,56 @@ static int i18nTag(Header h, int_32 tag,
  }

  /**
+ * Retrieve text and convert to locale.
+ */
+static int localeTag(Header h, int_32 tag, /*@out@*/ rpmTagType * type,
+		/*@out@*/ const void ** data, /*@out@*/ int_32 * count,
+		/*@out@*/ int * freeData)
+{
+    HGE_t hge = (HGE_t)headerGetEntryMinMemory;
+    rpmTagType t;
+    char **d, **d2, *dp;
+    int rc, i, l;
+
+    rc = hge(h, tag, &t, (void **)&d, count);
+    if (!rc || d == NULL || *count == 0) {
+	*freeData = 0;
+	*data = NULL;
+	*count = 0;
+	return 1;
+    }
+    if (type)
+	*type = t;
+    if (t == RPM_STRING_TYPE) {
+	d = (char **)xstrdup((char *)d);
+	d = (char **)strtolocale((char *)d);
+	*freeData = 1;
+    } else if (t == RPM_STRING_ARRAY_TYPE) {
+	l = 0;
+        for (i = 0; i < *count; i++) {
+	    d[i] = xstrdup(d[i]);
+	    d[i] = (char *)strtolocale(d[i]);
+	    l += strlen(d[i]) + 1;
+	}
+	d2 = xmalloc(*count * sizeof(char *) + l);
+	dp = (char *)(d2 + *count);
+        for (i = 0; i < *count; i++) {
+	    d2[i] = dp;
+	    strcpy(dp, d[i]);
+	    dp += strlen(dp) + 1;
+	    d[i] = _free(d[i]);
+	}
+	d = _free(d);
+	d = d2;
+	*freeData = 1;
+    } else
+	*freeData = 0;
+    *data = (void **)d;
+    return 0;
+}
+
+
+/**
   * Retrieve summary text.
   * @param h		header
   * @retval *type	tag type
@@ -1127,6 +1321,20 @@ static int descriptionTag(Header h, /*@o
      return i18nTag(h, RPMTAG_DESCRIPTION, type, data, count, freeData);
  }

+static int changelognameTag(Header h, /*@out@*/ rpmTagType * type,
+		/*@out@*/ const void ** data, /*@out@*/ int_32 * count,
+		/*@out@*/ int * freeData)
+{
+    return localeTag(h, RPMTAG_CHANGELOGNAME, type, data, count, freeData);
+}
+
+static int changelogtextTag(Header h, /*@out@*/ rpmTagType * type,
+		/*@out@*/ const void ** data, /*@out@*/ int_32 * count,
+		/*@out@*/ int * freeData)
+{
+    return localeTag(h, RPMTAG_CHANGELOGTEXT, type, data, count, freeData);
+}
+
  /**
   * Retrieve group text.
   * @param h		header
@@ -1152,6 +1360,8 @@ const struct headerSprintfExtension_s rp
      { HEADER_EXT_TAG, "RPMTAG_GROUP",		{ groupTag } },
      { HEADER_EXT_TAG, "RPMTAG_DESCRIPTION",	{ descriptionTag } },
      { HEADER_EXT_TAG, "RPMTAG_SUMMARY",		{ summaryTag } },
+    { HEADER_EXT_TAG, "RPMTAG_CHANGELOGNAME",	{ changelognameTag } },
+    { HEADER_EXT_TAG, "RPMTAG_CHANGELOGTEXT",	{ changelogtextTag } },
      { HEADER_EXT_TAG, "RPMTAG_FILECLASS",	{ fileclassTag } },
      { HEADER_EXT_TAG, "RPMTAG_FILECONTEXTS",	{ filecontextsTag } },
      { HEADER_EXT_TAG, "RPMTAG_FILENAMES",	{ filenamesTag } },



More information about the Rpm-maint mailing list