Source code for xls2xlsx.utils

from fractions import Fraction
from openpyxl.utils.datetime import from_excel
# broken! from openpyxl.styles.numbers import is_date_format
from openpyxl.styles.numbers import STRIP_RE
from datetime import datetime, date, timedelta
from datetime import time as tm
import re

TRACE=False

[docs]def is_date_format(fmt): # The one in openpyxl doesn't work properly! if fmt is None: return False fmt = fmt.split(";")[0] # only look at the first format fmt = STRIP_RE.sub("", fmt) # ignore some formats return re.search(r"(?<!\\)[dmhysDMHYS]", fmt) is not None
[docs]def perform_number_format(value, number_format): """This is a half-baked attempt at formatting the given value using the given Excel number_format. This is used by the tests to match values. Handled is many of the formats for numbers (int/float), datetime, date, time, and timedelta.""" if number_format == 'General' or isinstance(value, str): return value if number_format == '@': return str(value) grabit = [] def grab_escapes(number_format): nonlocal grabit def sub_grabit(m): i = len(grabit) grabit.append(m.group(1)) return f'{{{i}}}' nf = re.sub(r'\\(.)', sub_grabit, number_format) nf = re.sub(r'"([^"]*)"', sub_grabit, nf) nf = re.sub(r'\[(hh|h|mm|m|ss|s)\]', r'<\1>', nf) # So we don't match the next rule with [h] nf = re.sub(r'\[[^\]]+\]', '', nf) # Remove [Blue], [$-F800], etc nf = re.sub(r'<(hh|h|mm|m|ss|s)>', r'[\1]', nf) # Put back the [h] etc return nf def restore_escapes(nf): nonlocal grabit if len(grabit): nf = nf.format(*grabit) # Put escaped chars back in return nf if TRACE: print(f'perform_number_format({value}, {number_format})') if (isinstance(value, int) or isinstance(value, float)) and is_date_format(number_format): if '[h' in number_format or '[m' in number_format or '[s' in number_format: value = timedelta(days=value) else: value = from_excel(value) if isinstance(value, int) or isinstance(value, float): # Note: This is NOT a full implementation of Excel int/float number formatting! format_split = number_format.split(';') number_format = format_split[0] prefix = '' suffix = '' if value < 0 and len(format_split) >= 2: number_format = format_split[1] value = abs(value) elif value == 0 and len(format_split) >= 3: number_format = format_split[2] if not number_format: return '' nf = grab_escapes(number_format) fmt = 'f' if isinstance(value, int): fmt = 'd' if '%' in nf: fmt = '%' elif 'E' in nf: fmt = 'E' nf = re.sub(r'E[+0#?]+', 'E', nf) comma = '' pound = '' c_ndx = nf.find(',') d_ndx = nf.find('.') p_ndx = nf.find('#') if c_ndx >= 0: if d_ndx >=0 and c_ndx > d_ndx: while c_ndx < len(nf): value /= 100 c_ndx = nf.find(',', c_ndx+1) else: comma = ',' places = '' if d_ndx >= 0: if p_ndx > d_ndx: pound = '#' nf = nf.replace('#', '0') places = f'.{nf[d_ndx+1:].count("0")}' if fmt == 'd': value = float(value) fmt = 'f' elif fmt == 'd': zeros = nf.count('0') if zeros: fmt = f'0{zeros}' + fmt else: places = '.0' nf = re.sub(r'_.', ' ', nf) nf = nf.replace('*', '') # We can't really do this one m = re.match(r'((?:[^0#.E%,?*]*{\d+}[^0#.E%,?*]*)|(?:[^0#.E%,?*]*))[0#.E%,?*]+(.*[/][0-9?#]+)?(.*)$', nf) prefix = restore_escapes(m.group(1)) fraction = m.group(2) suffix = restore_escapes(m.group(3)) if fraction: s_ndx = fraction.find('/') suf = restore_escapes(fraction[:s_ndx]).replace('?', '').replace('#', '').replace('0', '') fraction = fraction[s_ndx+1:] if fraction.isdigit(): ival = int(value) value -= ival suf += f'{value//int(fraction)}/{fraction}' value = ival if fraction[0] != '?' or float(int(value)) != value: ival = int(value) value -= ival fr = Fraction.from_float(value).limit_denominator(10**(len(fraction))-1) suf += f'{fr.numerator}/{fr.denominator}' suffix = suf + suffix py_format = f'{prefix}{{0:{pound}{comma}{places}{fmt}}}{suffix}' value = py_format.format(value) if TRACE: print(f'perform_number_format: using {py_format} to produce {value}') return value number_format = number_format.split(';')[0] if isinstance(value, tm): value = datetime(1, 1, 1, value.hour, value.minute, value.second) elif isinstance(value, date) and not isinstance(value, datetime): value = datetime(value.year, value.month, value.day) if isinstance(value, datetime) and \ ('[h' in number_format or '[m' in number_format or '[s' in number_format): value = timedelta(hours=value.hour, minutes=value.minute, seconds=value.second + value.microsecond / 1000000.0) if isinstance(value, timedelta): total_seconds = int(value.total_seconds()) hours = total_seconds // 3600 total_minutes = total_seconds // 60 minutes = (total_seconds % 3600) // 60 seconds = total_seconds % 60 nf = grab_escapes(number_format) nf = nf.replace('[hh]', f'{hours:02d}').replace('[mm]', f'{total_minutes:02d}'). \ replace('[ss]', f'{total_seconds:02d}').replace('[h]', str(hours)). \ replace('[m]', str(total_minutes)).replace('[s]', str(total_seconds)). \ replace('mm', f'{minutes:02d}').replace('ss', f'{seconds:02d}'). \ replace('m', str(minutes)).replace('s', str(seconds)) nf = restore_escapes(nf) value = nf if TRACE: print(f'perform_number_format: timedelta produced {value} (grabit = {grabit})') if isinstance(value, datetime): if value.microsecond >= 500000: # Round up 999999 ms to the next second value = value.replace(microsecond=0) + timedelta(seconds=1) fmt = grab_escapes(number_format) fmt = fmt.replace('yyyy', '%Y').replace('yy', '%y').replace('dddd', '%A').replace('ddd', '%a'). \ replace('dd', '%D').replace('mmmm', '%B').replace('mmm', '%b').replace('AM/PM', '%p'). \ replace('ss', '%S') h_ndx = fmt.find('h') if '%p' in fmt: fmt = fmt.replace('hh', '%I') else: fmt = fmt.replace('hh', '%H') # Now let's handle the hard ones: mm, m, d, h, a/p ap_ndx = fmt.find('a/p') if ap_ndx >= 0: fmt = fmt.replace('a/p', '%p') while True: m_ndx = fmt.find('mm') if m_ndx < 0: break if h_ndx >= 0 and m_ndx > h_ndx: # it's minutes fmt = fmt[:m_ndx] + '%M' + fmt[m_ndx+2:] continue fmt = fmt[:m_ndx] + '%X' + fmt[m_ndx+2:] # it's months (corrected below) while True: m_ndx = fmt.find('m') if m_ndx < 0: break if h_ndx >= 0 and m_ndx > h_ndx: # it's minutes fmt = fmt[:m_ndx] + str(value.minute) + fmt[m_ndx+1:] continue fmt = fmt[:m_ndx] + str(value.month) + fmt[m_ndx+1:] # it's months d_ndx = fmt.find('d') if d_ndx >= 0: fmt = fmt.replace('d', str(value.day)) if h_ndx >= 0: if '%p' in fmt: hour = value.hour if hour > 12: hour -= 12 if hour == 0: hour = 12 fmt = fmt.replace('h', str(hour)) else: fmt = fmt.replace('h', str(value.hour)) fmt = fmt.replace('%D', '%d').replace('%X', '%m') fmt = restore_escapes(fmt) value = value.strftime(fmt) if ap_ndx >= 0: value = value.replace('AM', 'a').replace('PM', 'p') if TRACE: print(f'perform_number_format: using {fmt} to produce {value} (grabit={grabit})') return value