• R/O
  • SSH

execsql: Commit

Default repository for execsql.py


Commit MetaInfo

Révision00e8e657948b74c215e0d67279bc516eacaebc03 (tree)
l'heure2022-02-20 08:52:32
Auteurrdnielsen
Commiterrdnielsen

Message de Log

Added variant IMPORT metacommands that will import multiple worksheets at once from OpenDocument and Excel workbooks.

Change Summary

Modification

diff -r fc62f2357e9a -r 00e8e657948b CHANGELOG.rst
--- a/CHANGELOG.rst Wed Jan 12 07:42:22 2022 -0800
+++ b/CHANGELOG.rst Sat Feb 19 15:52:32 2022 -0800
@@ -1,6 +1,7 @@
11 ========== ========== =================================================================================
22 Version Date Features
33 ========== ========== =================================================================================
4+1.99.0 2022-02-19 Added the variant IMPORT metacommands that use a SHEETS MATCHING <regex> clause to import multiple sheets from an OpenDocument or Excel workbook in one step.
45 1.98.0 2022-01-12 Added the FOLD_COLUMN_HEADERS configuration setting. Cleaning column headers now adds an underscore to the beginning of any column header that starts with a digit.
56 1.97.0 2022-01-08 Added the CONTAINS, ENDS_WITH, and STARTS_WITH conditional tests. Modified the 'textarea' control in an ENTRY_FORM to allow newlines to be inserted, and to strip trailing newlines. Modified the SQL statement evaluator to ignore multiple terminating semicolons.
67 1.96.0 2022-01-03 Reading of .xlsx files now uses the openpyxl library--a new requirement.
diff -r fc62f2357e9a -r 00e8e657948b doc/source/conf.py
--- a/doc/source/conf.py Wed Jan 12 07:42:22 2022 -0800
+++ b/doc/source/conf.py Sat Feb 19 15:52:32 2022 -0800
@@ -58,9 +58,9 @@
5858 # built documents.
5959 #
6060 # The short X.Y version.
61-version = u'1.98'
61+version = u'1.99'
6262 # The full version, including alpha/beta/rc tags.
63-release = u'1.98'
63+release = u'1.99'
6464
6565 # A string of reStructuredText that will be included at the beginning of
6666 # every source file that is read.
diff -r fc62f2357e9a -r 00e8e657948b doc/source/metacommands.rst
--- a/doc/source/metacommands.rst Wed Jan 12 07:42:22 2022 -0800
+++ b/doc/source/metacommands.rst Sat Feb 19 15:52:32 2022 -0800
@@ -2045,7 +2045,7 @@
20452045
20462046 The columns in the metadata table that is exported are:
20472047
2048-* ``query``: The name of the table or query, or the select statement, that was th
2048+* ``query``: The name of the table or query, or the select statement, that was the
20492049 source of data.
20502050
20512051 * ``filename``: The name of the datafile that was created.
@@ -2924,16 +2924,30 @@
29242924 [WITH [QUOTE <quote_char> DELIMITER <delim_char>]
29252925 [ENCODING <encoding>]] [SKIP <lines>]
29262926
2927-The syntax for importing data from an OpenDocument spreadsheet is::
2927+The syntax for importing data from a single OpenDocument spreadsheet is::
29282928
29292929 IMPORT TO [NEW|REPLACEMENT] <table_name> FROM <file_name>
29302930 SHEET <sheet_name> [SKIP <rows>]
29312931
2932-The syntax for importing data from an Excel spreadsheet is::
2932+The syntax for importing data from multiple sheets within an OpenDocument
2933+workbook is::
2934+
2935+ IMPORT TO [NEW|REPLACEMENT] TABLES IN [SCHEMA] <schema_name>
2936+ FROM <file_name> SHEETS MATCHING <regular_expression> [SKIP <rows>]
2937+
2938+The syntax for importing data from a single Excel spreadsheet is::
29332939
29342940 IMPORT TO [NEW|REPLACEMENT] <table_name> FROM EXCEL <file_name>
29352941 SHEET <sheet_name> [SKIP <rows>] [ENCODING <encoding>]
29362942
2943+The syntax for importing data from multiple sheets within an Excel
2944+workbook is::
2945+
2946+ IMPORT TO [NEW|REPLACEMENT] TABLES IN [SCHEMA] <schema_name>
2947+ FROM EXCEL <file_name> SHEETS MATCHING <regular_expression>
2948+ [SKIP <rows>] [ENCODING <encoding>]
2949+
2950+
29372951 Column names in the input must be valid for the DBMS in use.
29382952
29392953 If the "WITH QUOTE <quote_char> DELIMITER <delim_char>" clause is not
diff -r fc62f2357e9a -r 00e8e657948b execsql/execsql.py
--- a/execsql/execsql.py Wed Jan 12 07:42:22 2022 -0800
+++ b/execsql/execsql.py Sat Feb 19 15:52:32 2022 -0800
@@ -27,11 +27,11 @@
2727 #
2828 # ===============================================================================
2929
30-__version__ = "1.98.0"
31-__vdate = "2022-01-12"
30+__version__ = "1.99.0"
31+__vdate = "2022-02-19"
3232
3333 primary_vno = 1
34-secondary_vno = 98
34+secondary_vno = 99
3535 tertiary_vno = 0
3636
3737 import os
@@ -728,6 +728,15 @@
728728 def ins_quoted_rx(fragment1, fragment2, rx):
729729 return ins_rxs((rx, r'"%s"' % rx), fragment1, fragment2)
730730
731+def ins_schema_rxs(fragment1, fragment2, suffix=None):
732+ schema_exprs = (r'"(?P<schema>[A-Za-z0-9_\- ]+)"',
733+ r'(?P<schema>[A-Za-z0-9_\-]+)',
734+ r'\[(?P<schema>[A-Za-z0-9_\- ]+)\]'
735+ )
736+ if suffix:
737+ schema_exprs = tuple([s.replace("schema", "schema"+suffix) for s in schema_exprs])
738+ return ins_rxs(schema_exprs, fragment1, fragment2)
739+
731740 def ins_table_rxs(fragment1, fragment2, suffix=None):
732741 tbl_exprs = (r'(?:"(?P<schema>[A-Za-z0-9_\- ]+)"\.)?"(?P<table>[A-Za-z0-9_\-\# ]+)"',
733742 r'(?:(?P<schema>[A-Za-z0-9_\-]+)\.)?(?P<table>[A-Za-z0-9_\-\#]+)',
@@ -1585,6 +1594,8 @@
15851594 self.datemode = self.wbk.datemode
15861595 else:
15871596 raise XlsFileError("There is no Excel file %s." % self.filename)
1597+ def sheetnames(self):
1598+ return self.wbk.sheets()
15881599 def sheet_named(self, sheetname):
15891600 # Return the sheet with the matching name. If the name is actually an integer,
15901601 # return that sheet number.
@@ -1706,6 +1717,8 @@
17061717 self.wbk = openpyxl.load_workbook(filename)
17071718 else:
17081719 raise XlsxFileError("There is no Excel file %s." % self.filename)
1720+ def sheetnames(self):
1721+ return self.wbk.sheetnames
17091722 def sheet_named(self, sheetname):
17101723 # Return the sheet with the matching name. If the name is actually an integer,
17111724 # return that sheet number.
@@ -8824,6 +8837,52 @@
88248837 x_import_file)
88258838
88268839
8840+#**** IMPORT_ODS_PATTERN
8841+def x_import_ods_pattern(**kwargs):
8842+ newstr = kwargs['new']
8843+ if newstr:
8844+ is_new = 1 + ['new', 'replacement'].index(newstr.lower())
8845+ else:
8846+ is_new = 0
8847+ schemaname = kwargs['schema']
8848+ filename = kwargs['filename']
8849+ rx = re.compile(kwargs['patn'], re.I)
8850+ hdr_rows = kwargs['skip']
8851+ if not hdr_rows:
8852+ hdr_rows = 0
8853+ else:
8854+ hdr_rows = int(hdr_rows)
8855+ if not os.path.exists(filename):
8856+ raise ErrInfo(type="cmd", command_text=kwargs['metacommandline'], other_msg='Input file does not exist')
8857+ wbk = OdsFile()
8858+ try:
8859+ wbk.open(filename)
8860+ except:
8861+ raise ErrInfo(type="cmd", other_msg="%s is not a valid OpenDocument spreadsheet." % filename)
8862+ sheets = wbk.sheetnames()
8863+ impsheets = [s for s in sheets if rx.search(s)]
8864+ tables = impsheets
8865+ if conf.clean_col_hdrs:
8866+ tables = clean_words(tables)
8867+ if conf.fold_col_hdrs != 'no':
8868+ tables = fold_words(tables, conf.fold_col_hdrs)
8869+ for ix in range(len(impsheets)):
8870+ sheetname = impsheets[ix]
8871+ tablename = tables[ix]
8872+ try:
8873+ importods(dbs.current(), schemaname, tablename, is_new, filename, sheetname, hdr_rows)
8874+ except ErrInfo:
8875+ raise
8876+ except:
8877+ raise ErrInfo("exception", exception_msg=exception_desc(), other_msg="Can't import data from ODS file %s" % filename)
8878+ return None
8879+
8880+metacommandlist.add(
8881+ ins_schema_rxs(r'\s*IMPORT\s+TO\s+(?:(?P<new>NEW|REPLACEMENT)\s+)?TABLES\s+IN\s+(?:SCHEMA\s+)?', ins_fn_rxs(r'\s+FROM\s+', r'\s+SHEETS\s+MATCHING\s+(?P<patn>\S+)(?:\s+SKIP\s+(?P<skip>\d+))?\s*?')),
8882+ x_import_ods_pattern)
8883+
8884+
8885+
88278886 #**** IMPORT ODS
88288887 def x_import_ods(**kwargs):
88298888 # is_new should have values of 0, 1, or 2
@@ -8857,6 +8916,61 @@
88578916 x_import_ods)
88588917
88598918
8919+#**** IMPORT_XLS_PATTERN
8920+def x_import_xls_pattern(**kwargs):
8921+ newstr = kwargs['new']
8922+ if newstr:
8923+ is_new = 1 + ['new', 'replacement'].index(newstr.lower())
8924+ else:
8925+ is_new = 0
8926+ schemaname = kwargs['schema']
8927+ filename = kwargs['filename']
8928+ rx = re.compile(kwargs['patn'], re.I)
8929+ hdr_rows = kwargs['skip']
8930+ encoding = kwargs['encoding']
8931+ if not hdr_rows:
8932+ hdr_rows = 0
8933+ else:
8934+ hdr_rows = int(hdr_rows)
8935+ if not os.path.exists(filename):
8936+ raise ErrInfo(type="cmd", command_text=kwargs['metacommandline'], other_msg='Input file does not exist')
8937+ if len(filename) < 4:
8938+ raise ErrInfo(type="cmd", other_msg="%s is not a recognizable Excel spreadsheet name." % filename)
8939+ ext3 = filename[-3:].lower()
8940+ if ext3 == "xls":
8941+ wbk = XlsFile()
8942+ elif ext3 == 'lsx':
8943+ wbk = XlsxFile()
8944+ else:
8945+ raise ErrInfo(type="cmd", other_msg="%s is not a recognizable Excel spreadsheet name." % filename)
8946+ try:
8947+ wbk.open(filename, encoding)
8948+ except:
8949+ raise ErrInfo(type="cmd", other_msg="%s is not a valid Excel spreadsheet." % filename)
8950+ sheets = wbk.sheetnames()
8951+ impsheets = [s for s in sheets if rx.search(s)]
8952+ tables = impsheets
8953+ if conf.clean_col_hdrs:
8954+ tables = clean_words(tables)
8955+ if conf.fold_col_hdrs != 'no':
8956+ tables = fold_words(tables, conf.fold_col_hdrs)
8957+ for ix in range(len(impsheets)):
8958+ sheetname = impsheets[ix]
8959+ tablename = tables[ix]
8960+ try:
8961+ importxls(dbs.current(), schemaname, tablename, is_new, filename, sheetname, hdr_rows, encoding)
8962+ except ErrInfo:
8963+ raise
8964+ except:
8965+ raise ErrInfo("exception", exception_msg=exception_desc(), other_msg="Can't import data from ODS file %s" % filename)
8966+ return None
8967+
8968+metacommandlist.add(
8969+ ins_schema_rxs(r'\s*IMPORT\s+TO\s+(?:(?P<new>NEW|REPLACEMENT)\s+)?TABLES\s+IN\s+(?:SCHEMA\s+)?', ins_fn_rxs(r'\s+FROM\s+EXCEL\s+', r'\s+SHEETS\s+MATCHING\s+(?P<patn>\S+)(?:\s+SKIP\s+(?P<skip>\d+))?(?:\s+ENCODING\s+(?P<encoding>\w+))?\s*?')),
8970+ x_import_xls_pattern)
8971+
8972+
8973+
88608974 #**** IMPORT XLS
88618975 def x_import_xls(**kwargs):
88628976 # is_new should have values of 0, 1, or 2
@@ -13631,7 +13745,7 @@
1363113745 else:
1363213746 src_extra_cols = [col for col in hdrs if col.lower() not in [tc.lower() for tc in table_cols]]
1363313747 if len(src_extra_cols) > 0:
13634- raise ErrInfo(type="error", other_msg=u"The input data table has the following columns that are not in table %s: %s." % (table_name, ", ".join(src_extra_cols)))
13748+ raise ErrInfo(type="error", other_msg=u"The input data table has the following columns that are not in table %s: %s." % (tablename, ", ".join(src_extra_cols)))
1363513749 import_cols = hdrs
1363613750 try:
1363713751 db.populate_table(schemaname, tablename, data, import_cols, get_ts)
Afficher sur ancien navigateur de dépôt.