• R/O
  • SSH

execsql: Commit

Default repository for execsql.py


Commit MetaInfo

Révision2365cbb42012af96df794c314fcbaf2e7bb66cf9 (tree)
l'heure2022-06-19 02:49:01
Auteurrdnielsen
Commiterrdnielsen

Message de Log

Added import from Parquet data files.

Change Summary

Modification

diff -r c36972c74ceb -r 2365cbb42012 CHANGELOG.rst
--- a/CHANGELOG.rst Sat May 28 15:40:16 2022 -0700
+++ b/CHANGELOG.rst Sat Jun 18 10:49:01 2022 -0700
@@ -1,6 +1,7 @@
11 ========== ========== =================================================================================
22 Version Date Features
33 ========== ========== =================================================================================
4+1.101.0 2022-06-18 Added import from data files in Parquet format.
45 1.100.3 2022-04-30 Modified the PROMPT ENTRY_FORM so that the 'Enter' key does not close the form when a checkbox has the focus.
56 1.100.1 2022-02-22 Added a bottom border to the header row and top-alignment of body cells to ODS export.
67 1.100.0 2022-02-20 Added the INITIALLY clause to the PROMPT ENTER_SUB metacommand.
diff -r c36972c74ceb -r 2365cbb42012 doc/source/conf.py
--- a/doc/source/conf.py Sat May 28 15:40:16 2022 -0700
+++ b/doc/source/conf.py Sat Jun 18 10:49:01 2022 -0700
@@ -58,9 +58,9 @@
5858 # built documents.
5959 #
6060 # The short X.Y version.
61-version = u'1.100'
61+version = u'1.101'
6262 # The full version, including alpha/beta/rc tags.
63-release = u'1.100'
63+release = u'1.101'
6464
6565 # A string of reStructuredText that will be included at the beginning of
6666 # every source file that is read.
diff -r c36972c74ceb -r 2365cbb42012 doc/source/metacommands.rst
--- a/doc/source/metacommands.rst Sat May 28 15:40:16 2022 -0700
+++ b/doc/source/metacommands.rst Sat Jun 18 10:49:01 2022 -0700
@@ -2953,6 +2953,11 @@
29532953 FROM EXCEL <file_name> SHEETS MATCHING <regular_expression>
29542954 [SKIP <rows>] [ENCODING <encoding>]
29552955
2956+The syntax for importing data from a data file in
2957+`Parquet <https://parquet.apache.org/>`_ format is::
2958+
2959+ IMPORT TO [NEW|REPLACEMENT] <table_name> FROM PARQUET <file_name>
2960+
29562961
29572962 Column names in the input must be valid for the DBMS in use.
29582963
diff -r c36972c74ceb -r 2365cbb42012 doc/source/requirements.rst
--- a/doc/source/requirements.rst Sat May 28 15:40:16 2022 -0700
+++ b/doc/source/requirements.rst Sat Jun 18 10:49:01 2022 -0700
@@ -56,6 +56,12 @@
5656 processors with the :ref:`EXPORT <export>` metacommand,
5757 those Python packages must be installed also.
5858
59+If data are to be :ref:`imported <import>` from the
60+`Parquet <https://parquet.apache.org/>`_ file format,
61+the `pandas <https://pypi.org/project/pandas/>`_ library and
62+either the *pyarrow* or *fastparquet* Python packages
63+must also be installed.
64+
5965 If data are to be :ref:`exported <export>` to the feather file format,
6066 the `pandas <https://pypi.org/project/pandas/>`_ and
6167 `feather-format <https://pypi.org/project/feather-format/>`_
diff -r c36972c74ceb -r 2365cbb42012 execsql/execsql.py
--- a/execsql/execsql.py Sat May 28 15:40:16 2022 -0700
+++ b/execsql/execsql.py Sat Jun 18 10:49:01 2022 -0700
@@ -4,7 +4,7 @@
44 #
55 # PURPOSE
66 # Read a sequence of SQL statements from a file and execute them against a PostgreSQL,
7-# MS-Access, SQLite, SQL Server, MySQL, Firebird, or Orcacle database, or a DSN, and
7+# MS-Access, SQLite, SQL Server, MySQL, Firebird, or Oracle database, or a DSN, and
88 # supplement the SQL statements with metacommands that allow import and export of data,
99 # and conditional execution of parts of the script. This program provides a standard tool
1010 # for execution of SQL scripts with DBMSs that have varying--or no--capabilities for
@@ -27,12 +27,12 @@
2727 #
2828 # ===============================================================================
2929
30-__version__ = "1.100.3"
31-__vdate = "2022-05-28"
30+__version__ = "1.101.0"
31+__vdate = "2022-06-18"
3232
3333 primary_vno = 1
34-secondary_vno = 100
35-tertiary_vno = 4
34+secondary_vno = 101
35+tertiary_vno = 0
3636
3737 import os
3838 import os.path
@@ -8550,6 +8550,36 @@
85508550 metacommandlist.add(ins_fn_rxs(r'^\s*DEBUG\s+WRITE\s+METACOMMANDLIST\s+TO\s+', r'\s*$'), x_debug_write_metacommands)
85518551
85528552
8553+#**** IMPORT FROM PARQUET
8554+def x_import_parquet(**kwargs):
8555+ # is_new should have values of 0, 1, or 2
8556+ newstr = kwargs['new']
8557+ if newstr:
8558+ is_new = 1 + ['new', 'replacement'].index(newstr.lower())
8559+ else:
8560+ is_new = 0
8561+ schemaname = kwargs['schema']
8562+ tablename = kwargs['table']
8563+ filename = kwargs['filename']
8564+ if len(filename) > 1 and filename[0] == "~" and filename[1] == os.sep:
8565+ filename = os.path.join(os.path.expanduser(r'~'), filename[2:])
8566+ if not os.path.exists(filename):
8567+ raise ErrInfo(type="cmd", command_text=kwargs['metacommandline'], other_msg='Input file %s does not exist' % filename)
8568+ sz, dt = file_size_date(filename)
8569+ exec_log.log_status_info(u"IMPORTing from Parquet file %s (%s, %s)" % (filename, sz, dt))
8570+ try:
8571+ import_parquet(dbs.current(), schemaname, tablename, filename, is_new)
8572+ except ErrInfo:
8573+ raise
8574+ except:
8575+ raise ErrInfo("exception", exception_msg=exception_desc(), other_msg="Can't import data from Parquet data file %s" % filename)
8576+ return None
8577+
8578+metacommandlist.add(
8579+ ins_table_rxs(r'^\s*IMPORT\s+TO\s+(?:(?P<new>NEW|REPLACEMENT)\s+)?', ins_fn_rxs(r'\s+FROM\s+PARQUET\s+', r'\s*$')),
8580+ x_import_parquet)
8581+
8582+
85538583 #**** DEBUG WRITE ODBC_DRIVERS
85548584 def x_debug_write_odbc_drivers(**kwargs):
85558585 try:
@@ -13795,6 +13825,16 @@
1379513825 hdrs, data = xls_data(filename, sheetname, junk_header_rows, encoding)
1379613826 import_data_table(db, schemaname, tablename, is_new, hdrs, data)
1379713827
13828+def import_parquet(db, schemaname, tablename, filename, is_new):
13829+ try:
13830+ import pandas as pd
13831+ except:
13832+ raise ErrInfo("exception", exception_msg=exception_desc(), other_msg=u"The pandas and fastparquet or pyarrow Python libraries must be installed to import data from the Parquet format.")
13833+ df = pd.read_parquet(filename)
13834+ hdrs = df.columns.values.tolist()
13835+ data = df.values.tolist()
13836+ import_data_table(db, schemaname, tablename, is_new, hdrs, data)
13837+
1379813838
1379913839 def importtable(db, schemaname, tablename, filename, is_new, skip_header_line=True, quotechar=None, delimchar=None, encoding=None, junk_header_lines=0):
1380013840 global conf
diff -r c36972c74ceb -r 2365cbb42012 setup.py
--- a/setup.py Sat May 28 15:40:16 2022 -0700
+++ b/setup.py Sat Jun 18 10:49:01 2022 -0700
@@ -5,7 +5,7 @@
55 long_description = f.read()
66
77 setuptools.setup(name='execsql',
8- version='1.100.4',
8+ version='1.101.0',
99 description="Runs a SQL script against a PostgreSQL, MS-Access, SQLite, MS-SQL-Server, MySQL, MariaDB, Firebird, or Oracle database, or an ODBC DSN. Provides metacommands to import and export data, copy data between databases, conditionally execute SQL and metacommands, and dynamically alter SQL and metacommands with substitution variables. Data can be exported in 18 different formats, including CSV, TSV, ODS, HTML, JSON, LaTeX, and Markdown tables, and using custom templates.",
1010 author='Dreas Nielsen',
1111 author_email='dreas.nielsen@gmail.com',
@@ -40,7 +40,7 @@
4040 'xlrd', 'psycopg2', 'pyodbc', 'pymysql', 'fdb', 'cx_Oracle', 'cx-Oracle',
4141 'odfpy', 'ETL', 'CSV', 'TSV', 'XML', 'HTML', 'JSON', 'Feather', 'LaTeX', 'OpenDocument',
4242 'table', 'DBMS', 'Redshift', 'CockroachDB', 'query', 'script', 'import', 'export',
43- 'template', 'Jinja', 'Airspeed', 'zip'],
43+ 'Parquet', 'template', 'Jinja', 'Airspeed', 'zip'],
4444 long_description_content_type="text/markdown",
4545 long_description=long_description
4646 )
Afficher sur ancien navigateur de dépôt.