Preparers parse HTML operation examples v1.3
These examples use preparers with the ParseHtml operation in AI Accelerator.
Primitive
-- Default method is to structurally parse HTML to plaintext SELECT * FROM aidb.parse_html( '<html><body><h1>Hello World Heading</h1><p>Hello World paragraph</p></body></html>' );
Output
      parse_html
-----------------------
 Hello World Heading  +
                      +
 Hello World paragraph+
(1 row)-- Parse Hello World HTML to plaintext SELECT * FROM aidb.parse_html( html => '<h1>Hello, world!</h1> <p>This is my first web page.</p> <p> It contains some <strong>bold text</strong>, some <em>italic test</em>, and a <a href="https://google.com" target="_blank">link</a>. </p> <img src="postgres_logo.png" alt="Postgres Logo Image"> <ol> <li>List item</li> <li>List item</li> <li>List item</li> </ol>', options => '{"method": "StructuredPlaintext"}' -- Default );
Output
                        parse_html
-----------------------------------------------------------
 Hello, world!                                            +
                                                          +
 This is my first web page.                               +
                                                          +
 It contains some bold text, some italic test, and a link.+
                                                          +
 Postgres Logo Image                                      +
 List item                                                +
 List item                                                +
 List item                                                +
(1 row)-- Parse Hello World HTML to markdown-esque text that retains some syntactical context SELECT * FROM aidb.parse_html( html => '<h1>Hello, world!</h1> <p>This is my first web page.</p> <p> It contains some <strong>bold text</strong>, some <em>italic test</em>, and a <a href="https://google.com" target="_blank">link</a>. </p> <img src="postgres_logo.png" alt="Postgres Logo Image"> <ol> <li>List item</li> <li>List item</li> <li>List item</li> </ol>', options => '{"method": "StructuredMarkdown"}' );
Output
                                      parse_html
---------------------------------------------------------------------------------------
 # Hello, world!                                                                      +
                                                                                      +
 This is my first web page.                                                           +
                                                                                      +
 It contains some **bold text**, some *italic test*, and a [link](https://google.com).+
                                                                                      +
                                             +
 1. List item                                                                         +
 2. List item                                                                         +
 3. List item                                                                         +
(1 row)Preparer with table data source
-- Create source test table CREATE TABLE source_table__2772 ( id INT GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY, content TEXT NOT NULL ); INSERT INTO source_table__2772 VALUES (1, '<html><body><h1>Hello World Heading</h1><p>Hello World paragraph</p></body></html>'), (2, '<p>This is some <strong>bold text</strong>, some <em>italic test</em>, and a <a href="https://google.com" target="_blank">link</a>.'); SELECT aidb.create_table_preparer( name => 'preparer__2772', operation => 'ParseHtml', source_table => 'source_table__2772', source_data_column => 'content', destination_table => 'destination_table__2772', destination_data_column => 'parsed_html', source_key_column => 'id', destination_key_column => 'id', options => '{"method": "StructuredPlaintext"}'::JSONB -- Configuration for the ParseHtml operation ); SELECT aidb.bulk_data_preparation('preparer__2772'); SELECT * FROM destination_table__2772;
Output
 id |                      parsed_html
----+-------------------------------------------------------
 1  | Hello World Heading                                  +
    |                                                      +
    | Hello World paragraph                                +
    |
 2  | This is some bold text, some italic test, and a link.+
    |
(2 rows)- On this page
 - Primitive
 - Preparer with table data source