Enable/disable ANSI escape sequences on screen lines instead of only following XHTML open/close tags.
rajeevvp opened this issue · comments
epub2txt
should, I think, set and reset ANSI escape sequences for things like <em>
tags for
each on-screen line instead of only setting/resetting them when the open/close tags are
encountered.
Otherwise, since less -R
resets ANSI colour attributes automatically at the end of each line,
when escape-sequences spanning multiple lines, only the first one has the proper colour
attribute.
more
doesn't have this problem, and neither does less -r
, but both can potentially mess-up
the display in other ways.
Thanks,
RVP
And, here is a minimally invasive patch to do just that:
diff -urN epub2txt2-master.orig/src/wrap.c epub2txt2-master/src/wrap.c
--- epub2txt2-master.orig/src/wrap.c 2020-01-04 22:30:09.000000000 +0530
+++ epub2txt2-master/src/wrap.c 2020-01-09 14:23:59.128292592 +0530
@@ -33,7 +33,9 @@
int state;
int column;
int white_count;
+ unsigned int fmt;
void *app_data;
+ void *app_opts;
BOOL blank_line;
WT_UTF32 last;
WT_UTF32 *token;
@@ -138,6 +140,7 @@
if (l + context->priv->column + 1 >= context->priv->width)
{
_wraptext_emit_newline (context);
+ xhtml_emit_fmt_eol (context);
context->priv->column = 0;
}
@@ -330,6 +333,7 @@
self->priv->column = 0;
self->priv->last = 0;
self->priv->white_count = 0;
+ self->priv->fmt = 0;
self->priv->blank_line = TRUE;
if (self->priv->token) free (self->priv->token);
self->priv->token = NULL;
@@ -353,6 +357,36 @@
self->priv->flags = flags;
}
+void wraptext_context_zero_fmt (WrapTextContext *self)
+ {
+ self->priv->fmt = 0;
+ }
+
+unsigned int wraptext_context_get_fmt (WrapTextContext *self)
+ {
+ return self->priv->fmt;
+ }
+
+void wraptext_context_set_fmt (WrapTextContext *self, unsigned int fmt)
+ {
+ self->priv->fmt |= fmt;
+ }
+
+void wraptext_context_reset_fmt (WrapTextContext *self, unsigned int fmt)
+ {
+ self->priv->fmt &= ~fmt;
+ }
+
+void wraptext_context_set_app_opts (WrapTextContext *self, void *app_opts)
+ {
+ self->priv->app_opts = app_opts;
+ }
+
+void *wraptext_context_get_app_opts (WrapTextContext *self)
+ {
+ return self->priv->app_opts;
+ }
+
void wraptext_context_set_app_data (WrapTextContext *self, void *app_data)
{
self->priv->app_data = app_data;
diff -urN epub2txt2-master.orig/src/wrap.h epub2txt2-master/src/wrap.h
--- epub2txt2-master.orig/src/wrap.h 2020-01-04 22:30:09.000000000 +0530
+++ epub2txt2-master/src/wrap.h 2020-01-09 14:22:44.780569828 +0530
@@ -38,6 +38,13 @@
void wraptext_context_set_output_fn (WrapTextContext *self,
WrapTextOutputFn fn);
+unsigned int wraptext_context_get_fmt (WrapTextContext *self);
+void wraptext_context_zero_fmt (WrapTextContext *self);
+void wraptext_context_set_fmt (WrapTextContext *self, unsigned int fmt);
+void wraptext_context_reset_fmt (WrapTextContext *self, unsigned int fmt);
+void wraptext_context_set_app_opts (WrapTextContext *self, void *app_opts);
+void *wraptext_context_get_app_opts (WrapTextContext *self);
+
void wraptext_context_set_flags (WrapTextContext *self, int flags);
void wraptext_context_set_width (WrapTextContext *self, int width);
diff -urN epub2txt2-master.orig/src/xhtml.c epub2txt2-master/src/xhtml.c
--- epub2txt2-master.orig/src/xhtml.c 2020-01-04 22:30:09.000000000 +0530
+++ epub2txt2-master/src/xhtml.c 2020-01-09 14:40:49.054307798 +0530
@@ -35,6 +35,8 @@
FORMAT_H4_ON, FORMAT_H4_OFF,
FORMAT_H5_ON, FORMAT_H5_OFF } Format;
+enum { FMT_BOLD = 0x01, FMT_ITAL = 0x02 }; /* bitmasks */
+
/*============================================================================
xhtml_is_start_format_tag
@@ -208,6 +210,83 @@
OUT
}
+/*============================================================================
+ xhtml_emit_fmt_eol
+============================================================================*/
+void xhtml_emit_fmt_eol (WrapTextContext *context)
+ {
+ IN
+
+ unsigned int fmt = wraptext_context_get_fmt(context);
+ const Epub2TxtOptions *options = (Epub2TxtOptions *) wraptext_context_get_app_opts(context);
+
+ if (options->ansi && !options->raw && fmt)
+ {
+ /* reset ANSI escape-sequence at EOL. */
+ xhtml_emit_format(options, FORMAT_BOLD_OFF);
+
+ /* turn those set back on at BOL. */
+ if (fmt & FMT_BOLD)
+ xhtml_emit_format(options, FORMAT_BOLD_ON);
+ if (fmt & FMT_ITAL)
+ xhtml_emit_format(options, FORMAT_ITALIC_ON);
+
+ }
+ OUT
+ }
+
+/*============================================================================
+ xhtml_set_format
+============================================================================*/
+void xhtml_set_format (const Epub2TxtOptions *options, Format format, WrapTextContext *context)
+ {
+ IN
+
+ if (options->ansi && !options->raw)
+ {
+ switch (format)
+ {
+ case FORMAT_BOLD_ON:
+ wraptext_context_set_fmt(context, FMT_BOLD);
+ break;
+
+ case FORMAT_BOLD_OFF:
+ wraptext_context_reset_fmt(context, FMT_BOLD);
+ break;
+
+ case FORMAT_ITALIC_ON:
+ wraptext_context_set_fmt(context, FMT_ITAL);
+ break;
+
+ case FORMAT_ITALIC_OFF:
+ wraptext_context_reset_fmt(context, FMT_ITAL);
+ break;
+
+ case FORMAT_NONE:
+ wraptext_context_zero_fmt(context);
+ break;
+
+ case FORMAT_H1_ON:
+ case FORMAT_H2_ON:
+ case FORMAT_H3_ON:
+ case FORMAT_H4_ON:
+ case FORMAT_H5_ON:
+ wraptext_context_set_fmt(context, FMT_BOLD);
+ break;
+
+ case FORMAT_H1_OFF:
+ case FORMAT_H2_OFF:
+ case FORMAT_H3_OFF:
+ case FORMAT_H4_OFF:
+ case FORMAT_H5_OFF:
+ wraptext_context_reset_fmt(context, FMT_BOLD);
+ break;
+
+ }
+ }
+ OUT
+ }
+
@@ -629,6 +708,7 @@
WrapTextContext *context = wraptext_context_new();
wraptext_context_set_width (context, width);
+ wraptext_context_set_app_opts (context, (void *)options);
Mode mode = MODE_ANY;
BOOL inbody = FALSE;
@@ -771,6 +851,7 @@
xhtml_flush_line (para, options, context);
wstring_clear (para);
xhtml_emit_format (options, format);
+ xhtml_set_format(options, format, context);
}
}
else if (xhtml_is_end_format_tag (ss_tag, &format))
@@ -779,6 +860,7 @@
{
xhtml_flush_line (para, options, context);
xhtml_emit_format (options, format);
+ xhtml_set_format(options, format, context);
wstring_clear (para);
}
}
@@ -786,6 +868,7 @@
{
xhtml_flush_line (para, options, context);
xhtml_emit_format (options, format);
+ xhtml_set_format(options, format, context);
wstring_clear (para);
xhtml_para_break (context, options);
}
@@ -795,6 +878,7 @@
xhtml_flush_line (para, options, context);
wstring_clear (para);
xhtml_emit_format (options, format);
+ xhtml_set_format(options, format, context);
}
free (ss_tag);