Source code for openpyxl.worksheet.read_only

from __future__ import absolute_import
# Copyright (c) 2010-2017 openpyxl

""" Read worksheets on-demand
"""

# compatibility
from openpyxl.compat import (
    range,
    deprecated
)

# package
from openpyxl.cell.text import Text

from openpyxl.xml.functions import iterparse, safe_iterator
from openpyxl.xml.constants import SHEET_MAIN_NS

from openpyxl.worksheet import Worksheet
from openpyxl.utils import (
    column_index_from_string,
    get_column_letter,
    coordinate_to_tuple,
)
from openpyxl.worksheet.dimensions import SheetDimension
from openpyxl.cell.read_only import ReadOnlyCell, EMPTY_CELL


[docs]def read_dimension(source): if hasattr(source, "encode"): return min_row = min_col = max_row = max_col = None DIMENSION_TAG = '{%s}dimension' % SHEET_MAIN_NS DATA_TAG = '{%s}sheetData' % SHEET_MAIN_NS it = iterparse(source, tag=[DIMENSION_TAG, DATA_TAG]) for _event, element in it: if element.tag == DIMENSION_TAG: dim = SheetDimension.from_tree(element) return dim.boundaries elif element.tag == DATA_TAG: # Dimensions missing break element.clear()
ROW_TAG = '{%s}row' % SHEET_MAIN_NS CELL_TAG = '{%s}c' % SHEET_MAIN_NS VALUE_TAG = '{%s}v' % SHEET_MAIN_NS FORMULA_TAG = '{%s}f' % SHEET_MAIN_NS INLINE_TAG = '{%s}is' % SHEET_MAIN_NS DIMENSION_TAG = '{%s}dimension' % SHEET_MAIN_NS
[docs]class ReadOnlyWorksheet(object): _xml = None _min_column = 1 _min_row = 1 _max_column = _max_row = None def __init__(self, parent_workbook, title, worksheet_path, xml_source, shared_strings): self.parent = parent_workbook self.title = title self._current_row = None self.worksheet_path = worksheet_path self.shared_strings = shared_strings self.base_date = parent_workbook.excel_base_date self.xml_source = xml_source dimensions = read_dimension(self.xml_source) if dimensions is not None: self.min_column, self.min_row, self.max_column, self.max_row = dimensions # Methods from Worksheet self.cell = Worksheet.cell.__get__(self) self.iter_rows = Worksheet.iter_rows.__get__(self) def __getitem__(self, key): # use protected method from Worksheet meth = Worksheet.__getitem__.__get__(self) return meth(key) @property def xml_source(self): """Parse xml source on demand, default to Excel archive""" if self._xml is None: return self.parent._archive.open(self.worksheet_path) return self._xml @xml_source.setter def xml_source(self, value): self._xml = value
[docs] @deprecated("Use ws.iter_rows()") def get_squared_range(self, min_col, min_row, max_col, max_row): return self._cells_by_row(min_col, min_row, max_col, max_row)
def _cells_by_row(self, min_col, min_row, max_col, max_row): """ The source worksheet file may have columns or rows missing. Missing cells will be created. """ if max_col is not None: empty_row = tuple(EMPTY_CELL for column in range(min_col, max_col + 1)) else: empty_row = [] row_counter = min_row p = iterparse(self.xml_source, tag=[ROW_TAG], remove_blank_text=True) for _event, element in p: if element.tag == ROW_TAG: row_id = int(element.get("r", row_counter)) # got all the rows we need if max_row is not None and row_id > max_row: break # some rows are missing for row_counter in range(row_counter, row_id): row_counter += 1 yield empty_row # return cells from a row if min_row <= row_id: yield tuple(self._get_row(element, min_col, max_col, row_counter=row_counter)) row_counter += 1 element.clear() def _get_row(self, element, min_col=1, max_col=None, row_counter=None): """Return cells from a particular row""" col_counter = min_col data_only = getattr(self.parent, 'data_only', False) for cell in safe_iterator(element, CELL_TAG): coordinate = cell.get('r') if coordinate: row, column = coordinate_to_tuple(coordinate) else: row, column = row_counter, col_counter if max_col is not None and column > max_col: break if min_col <= column: if col_counter < column: for col_counter in range(max(col_counter, min_col), column): # pad row with missing cells yield EMPTY_CELL data_type = cell.get('t', 'n') style_id = int(cell.get('s', 0)) value = None formula = cell.findtext(FORMULA_TAG) if formula is not None and not data_only: data_type = 'f' value = "=%s" % formula elif data_type == 'inlineStr': child = cell.find(INLINE_TAG) if child is not None: richtext = Text.from_tree(child) value = richtext.content else: value = cell.findtext(VALUE_TAG) or None yield ReadOnlyCell(self, row, column, value, data_type, style_id) col_counter = column + 1 if max_col is not None: for _ in range(max(min_col, col_counter), max_col+1): yield EMPTY_CELL def _get_cell(self, row, column): """Cells are returned by a generator which can be empty""" for row in self.get_squared_range(column, row, column, row): if row: return row[0] return EMPTY_CELL @property def rows(self): return self.iter_rows() def __iter__(self): return self.iter_rows()
[docs] def calculate_dimension(self, force=False): if not all([self.max_column, self.max_row]): if force: self._calculate_dimension() else: raise ValueError("Worksheet is unsized, use calculate_dimension(force=True)") return '%s%d:%s%d' % ( get_column_letter(self.min_column), self.min_row, get_column_letter(self.max_column), self.max_row )
def _calculate_dimension(self): """ Loop through all the cells to get the size of a worksheet. Do this only if it is explicitly requested. """ max_col = 0 for r in self.rows: if not r: continue cell = r[-1] max_col = max(max_col, cell.column) self.max_row = cell.row self.max_column = max_col @property def min_row(self): return self._min_row @min_row.setter def min_row(self, value): self._min_row = value @property def max_row(self): return self._max_row @max_row.setter def max_row(self, value): self._max_row = value @property def min_column(self): return self._min_column @min_column.setter def min_column(self, value): self._min_column = value @property def max_column(self): return self._max_column @max_column.setter def max_column(self, value): self._max_column = value