[COMMIT] Have the result of coding_character_tell() reflect str->convert_to, too.

Wednesday, 22 January 2014

APPROVE COMMIT

NOTE: This patch has been committed.

# HG changeset patch
# User Aidan Kehoe <kehoea(a)parhasard.net&gt;
# Date 1390413120 0
# Node ID 0cb4f494a54809a054e242ee16c00c0e9c34d39d
# Parent  cfc6a8c144f16b2598435cfb0aa8a2a48204c947
Have the result of coding_character_tell() reflect str->convert_to, too.

src/ChangeLog addition:

2014-01-22  Aidan Kehoe  <kehoea(a)parhasard.net&gt;

	* text.c (buffered_bytecount_to_charcount): New.
	Return the number of characters in a coding or lstream buffer,
	including partial characters at the beginning of the buffer, but
	not including those at the end. Refactored from
	Lstream_character_tell().
	* text.h:
	* text.h (buffered_bytecount_to_charcount): Declare it.
	* lstream.c (Lstream_character_tell):
	Use the refactored buffered_bytecount_to_charcount () here, both
	for the unget buffer and in_buffer.
	* file-coding.c (coding_character_tell):
	Check the character count of the lstream buffer too, when passing
	back the character count from the coding stream.

diff -r cfc6a8c144f1 -r 0cb4f494a548 src/ChangeLog
--- a/src/ChangeLog	Tue Jan 21 00:27:16 2014 +0000
+++ b/src/ChangeLog	Wed Jan 22 17:52:00 2014 +0000
＠＠ -1,3 +1,19 ＠＠
+2014-01-22  Aidan Kehoe  <kehoea(a)parhasard.net&gt;
+
+	* text.c (buffered_bytecount_to_charcount): New.
+	Return the number of characters in a coding or lstream buffer,
+	including partial characters at the beginning of the buffer, but
+	not including those at the end. Refactored from
+	Lstream_character_tell().
+	* text.h:
+	* text.h (buffered_bytecount_to_charcount): Declare it.
+	* lstream.c (Lstream_character_tell):
+	Use the refactored buffered_bytecount_to_charcount () here, both
+	for the unget buffer and in_buffer.
+	* file-coding.c (coding_character_tell):
+	Check the character count of the lstream buffer too, when passing
+	back the character count from the coding stream.
+
 2014-01-21  Aidan Kehoe  <kehoea(a)parhasard.net&gt;

 	* redisplay.c (start_with_line_at_pixpos):
diff -r cfc6a8c144f1 -r 0cb4f494a548 src/file-coding.c
--- a/src/file-coding.c	Tue Jan 21 00:27:16 2014 +0000
+++ b/src/file-coding.c	Wed Jan 22 17:52:00 2014 +0000
＠＠ -1994,8 +1994,19 ＠＠
 coding_character_tell (Lstream *stream)
 {
   struct coding_stream *str = CODING_STREAM_DATA (stream);
-
-  return XCODESYSMETH_OR_GIVEN (str->codesys, character_tell, (str), -1);
+  Charcount ctell
+    = XCODESYSMETH_OR_GIVEN (str->codesys, character_tell, (str), -1);
+  
+  if (ctell > 0 && Dynarr_length (str->convert_to) > 0)
+    {
+      ctell
+        -= buffered_bytecount_to_charcount ((const Ibyte *)
+                                            (Dynarr_begin (str->convert_to)),
+                                            Dynarr_length (str->convert_to));
+      text_checking_assert (ctell >= 0);
+    }
+
+  return ctell;
 }

 static int
diff -r cfc6a8c144f1 -r 0cb4f494a548 src/lstream.c
--- a/src/lstream.c	Tue Jan 21 00:27:16 2014 +0000
+++ b/src/lstream.c	Wed Jan 22 17:52:00 2014 +0000
＠＠ -752,15 +752,10 ＠＠
           /* The character count should not include those characters
              currently *in* the unget buffer, subtract that count.  */
           Ibyte *ungot, *ungot_ptr;
-          Bytecount ii = lstr->unget_buffer_ind, impartial, sevenflen;
+          Bytecount ii = lstr->unget_buffer_ind;

           ungot_ptr = ungot
-            = alloca_ibytes (lstr->unget_buffer_ind) + MAX_ICHAR_LEN;
-
-          /* Make sure the string starts with a valid ibyteptr, otherwise
-             validate_ibyte_string_backward could run off the beginning. */
-          sevenflen = set_itext_ichar (ungot, (Ichar) 0x7f);
-          ungot_ptr += sevenflen;
+            = alloca_ibytes (lstr->unget_buffer_ind);

           /* Internal format data, but in reverse order. There's not
              actually a need to alloca here, we could work out the character
＠＠ -772,90 +767,23 ＠＠
               *ungot_ptr++ = lstr->unget_buffer[--ii];
             }

-          impartial
-            = validate_ibyte_string_backward (ungot, ungot_ptr - ungot);
-
-          /* Move past the character we added. */
-          impartial -= sevenflen;
-          INC_IBYTEPTR (ungot);
-
-          if (impartial > 0 && !valid_ibyteptr_p (ungot))
-            {
-              Ibyte *newstart = ungot, *limit = ungot + impartial;
-              /* Our consumer has the start of a partial character, we
-                 have the rest. */
-
-              while (!valid_ibyteptr_p (newstart) && newstart < limit)
-                {
-                  newstart++, impartial--;
-                }
-                  
-              /* Remove this character from the count, since the
-                 end-consumer hasn't seen the full character. */
-              ctell--;
-              ungot = newstart;
-            }
-          else if (valid_ibyteptr_p (ungot)
-                   && rep_bytes_by_first_byte (*ungot) > impartial)
-            {
-              /* Rest of a partial character has yet to be read, its first
-                 octet has probably been unread by Lstream_read_1(). We
-                 included it in the accounting in Lstream_unread(), adjust
-                 the figure here appropriately. */
-              ctell--;
-            }
-
-          /* bytecount_to_charcount will throw an assertion failure if we're
-             not at the start of a character. */
-          text_checking_assert (impartial == 0 || valid_ibyteptr_p (ungot));
-
           /* The character length of this text is included in
              unget_character_count; if the bytes are still in the unget
              buffer, then our consumers haven't seen them, and so the
              character tell figure shouldn't reflect them. Subtract it from
              the total.  */
-          ctell -= bytecount_to_charcount (ungot, impartial);
+          ctell
+            -= buffered_bytecount_to_charcount (ungot, ungot_ptr - ungot);
         }

       if (lstr->in_buffer_ind < lstr->in_buffer_current)
         {
-          Ibyte *inbuf = lstr->in_buffer + lstr->in_buffer_ind;
-          Bytecount partial = lstr->in_buffer_current - lstr->in_buffer_ind,
-            impartial;
-
-          if (!valid_ibyteptr_p (inbuf))
-            {
-              Ibyte *newstart = inbuf;
-              Ibyte *limit = lstr->in_buffer + lstr->in_buffer_current;
-              /* Our consumer has the start of a partial character, we
-                 have the rest. */
-
-              while (newstart < limit && !valid_ibyteptr_p (newstart))
-                {
-                  newstart++;
-                }
-                  
-              /* Remove this character from the count, since the
-                 end-consumer hasn't seen the full character. */
-              ctell--;
-              inbuf = newstart;
-              partial = limit - newstart;
-            }
-
-          if (valid_ibyteptr_p (inbuf)) 
-            {
-              /* There's at least one valid starting char in the string,
-                 validate_ibyte_string_backward won't run off the
-                 begining. */
-              impartial = 
-                validate_ibyte_string_backward (inbuf, partial);
-            }
-          else
-            {
-              impartial = 0;
-            }
-
-          ctell -= bytecount_to_charcount (inbuf, impartial);
+          ctell
+            -= buffered_bytecount_to_charcount ((const Ibyte *)
+                                                (lstr->in_buffer
+                                                 + lstr->in_buffer_ind),
+                                                lstr->in_buffer_current
+                                                - lstr->in_buffer_ind);
         }

       text_checking_assert (ctell >= 0);
diff -r cfc6a8c144f1 -r 0cb4f494a548 src/text.c
--- a/src/text.c	Tue Jan 21 00:27:16 2014 +0000
+++ b/src/text.c	Wed Jan 22 17:52:00 2014 +0000
＠＠ -2241,6 +2241,60 ＠＠
   return count;
 }

+/* Return the character count of an lstream or coding buffer of
+   internal-format text, counting partial characters at the beginning of the
+   buffer as whole characters, and *not* counting partial characters at the
+   end of the buffer. This is because the result of this function is
+   subtracted from the character count given by the coding system character
+   tell methods, which include the former but not the latter. */
+
+Charcount
+buffered_bytecount_to_charcount (const Ibyte *bufptr, Bytecount len)
+{
+  Boolint partial_first = 0;
+  Bytecount impartial;
+
+  if (valid_ibyteptr_p (bufptr))
+    {
+      if (rep_bytes_by_first_byte (*bufptr) > len)
+        {
+          /* This is a partial first character, include it. Return
+             immediately so validate_ibyte_string_backward doesn't run off
+             the beginning of the string. */
+          return (Charcount) 1;
+        }
+    }
+  else
+    {
+      const Ibyte *newstart = bufptr, *limit = newstart + len;
+
+      /* Our consumer has the start of a partial character, we have the
+         rest. */
+      while (newstart < limit && !valid_ibyteptr_p (newstart))
+        {
+          newstart++;
+        }
+                  
+      partial_first = 1;
+      bufptr = newstart;
+      len = limit - newstart;
+    }
+
+  if (len && valid_ibyteptr_p (bufptr))
+    {
+      /* There's at least one valid starting char in the string,
+         validate_ibyte_string_backward won't run off the begining. */
+      impartial = validate_ibyte_string_backward (bufptr, len);
+    }
+  else
+    {
+      impartial = 0;
+    }
+
+  return (Charcount) partial_first + bytecount_to_charcount (bufptr,
+                                                             impartial);
+}
+
 Bytecount
 charcount_to_bytecount_fun (const Ibyte *ptr, Charcount len)
 {
diff -r cfc6a8c144f1 -r 0cb4f494a548 src/text.h
--- a/src/text.h	Tue Jan 21 00:27:16 2014 +0000
+++ b/src/text.h	Wed Jan 22 17:52:00 2014 +0000
＠＠ -908,6 +908,12 ＠＠
   return ptr;
 }

+/* Return the character count of an lstream or coding buffer of internal
+   format text, counting partial characters at the beginning of the buffer
+   as whole characters, and *not* counting partial characters at the end of
+   the buffer. */
+Charcount buffered_bytecount_to_charcount (const Ibyte *, Bytecount len);
+
 #else

 #define bytecount_to_charcount(ptr, len) ((Charcount) (len))
＠＠ -916,6 +922,7 ＠＠
 #define charcount_to_bytecount_fmt(ptr, len, fmt) ((Bytecount) (len))
 #define skip_ascii(ptr, end) end
 #define skip_ascii_down(ptr, end) end
+#define buffered_bytecount_to_charcount (ptr, len) (len)

 #endif /* MULE */

-- 
‘Liston operated so fast that he once accidentally amputated an assistant’s 
fingers along with a patient’s leg, […] The patient and the assistant both 
died of sepsis, and a spectator reportedly died of shock, resulting in the 
only known procedure with a 300% mortality.’ (Atul Gawande, NEJM, 2012)

_______________________________________________
XEmacs-Patches mailing list
XEmacs-Patches(a)xemacs.org
http://lists.xemacs.org/mailman/listinfo/xemacs-patches

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

2006

2005

2004

2003

[COMMIT] Have the result of coding_character_tell() reflect str->convert_to, too.