In my project i use oracle as primary database and i\'ve faced a problem with parsing clob. So suppose we have a clob with value
aaaaaa
cccccc
bbbbb
A pipelined function with some additional options to drive the behavior. Tested/works on Windows, Oracle 11g (I have some suspicions it may fail in *nix environments because of how lines terminate).
CREATE OR REPLACE FUNCTION ETL_HELPER_PARSE
(P_CLOB NCLOB, P_LINES_TO_SKIP INT DEFAULT 0, P_PUT_EMPTY_LINES CHAR DEFAULT 'N') RETURN SYS.ODCIVarchar2List PIPELINED
AS
c_top_lines_to_skip CONSTANT NUMBER NOT NULL := P_LINES_TO_SKIP;
c_output_empty_lines CONSTANT CHAR(1) NOT NULL := P_PUT_EMPTY_LINES;
--
l_len INT := DBMS_LOB.GETLENGTH(P_CLOB);
l_hit INT := 0;
l_offset INT := 1;
l_amount INT;
l_buffer VARCHAR2(32767);
l_cnt INT := 1;
BEGIN
WHILE ( l_offset < l_len )
LOOP
l_hit := DBMS_LOB.INSTR (
lob_loc => P_CLOB -- IN CLOB CHARACTER SET ANY_CS
,pattern => CHR(13)||CHR(10) -- IN VARCHAR2 CHARACTER SET lob_loc%CHARSET
,offset => l_offset -- IN INTEGER := 1
,nth => 1 -- IN INTEGER := 1
);
l_amount := CASE WHEN COALESCE(l_hit, 0) > 0 THEN l_hit - l_offset ELSE l_len - l_offset + 1 END;
-- `l_amount=0` means a new empty line has been encountered
IF l_cnt > c_top_lines_to_skip
THEN
IF l_amount > 0
THEN
DBMS_LOB.READ(P_CLOB, l_amount, l_offset, l_buffer);
PIPE ROW (l_buffer);
ELSIF UPPER(c_output_empty_lines) = 'Y'
THEN
PIPE ROW ('');
END IF;
END IF;
l_offset := CASE WHEN COALESCE(l_hit, 0) > 0 THEN l_hit + 2 ELSE l_len END;
l_cnt := l_cnt + 1;
end loop;
EXCEPTION
WHEN OTHERS THEN
DBMS_OUTPUT.PUT_LINE('Error : '||SQLERRM);
END ETL_HELPER_PARSE;
Here is a piece of code that works. I suggest that you use explicit cursors instead of implicit ones (FOR i IN (select...)), for performance purpose.
First here is the script to create testcase.
create table test (c clob);
insert into test (c) values (
'azertyuiop
qsdfghjklm
wxcvbn
');
Then here is the script to read line by line Clob :
/* Formatted on 28/08/2012 14:16:52 (QP5 v5.115.810.9015) */
declare
nStartIndex number := 1;
nEndIndex number := 1;
nLineIndex number := 0;
vLine varchar2(2000);
cursor c_clob is
select c from test;
c clob;
-------------------------------
procedure printout
(p_clob in out nocopy clob) is
offset number := 1;
amount number := 32767;
len number := dbms_lob.getlength(p_clob);
lc_buffer varchar2(32767);
i pls_integer := 1;
begin
if ( dbms_lob.isopen(p_clob) != 1 ) then
dbms_lob.open(p_clob, 0);
end if;
amount := instr(p_clob, chr(10), offset);
while ( offset < len )
loop
dbms_lob.read(p_clob, amount, offset, lc_buffer);
dbms_output.put_line('Line #'||i||':'||lc_buffer);
offset := offset + amount;
i := i + 1;
end loop;
if ( dbms_lob.isopen(p_clob) = 1 ) then
dbms_lob.close(p_clob);
end if;
exception
when others then
dbms_output.put_line('Error : '||sqlerrm);
end printout;
---------------------------
begin
dbms_output.put_line('-----------');
open c_clob;
loop
fetch c_clob into c;
exit when c_clob%notfound;
printout(c);
end loop;
close c_clob;
end;
'amount' variable is used to detect end of line position. Be carfull, in some case the end of line is CHR(10)||CHR(13) (CR + LF), and in some other cases it is only CHR(10).
declare
c_clob clob := empty_clob();
c_offset number;
c_len number;
read_cnt number;
prev_buf number := 0;
read_str varchar2(32000);
BEGIN
-- Read the clob in to the local variable
select c into c_clob from test;
c_offset := 1;
-- Get the length of the clob
c_len := dbms_lob.getlength(c_clob);
-- Read till the current offset is less the length of clob
while(c_offset <= c_len)
loop
-- Get the index of the next new line character
read_cnt := instr(c_clob, CHR(10), c_offset, 1);
exit when read_cnt = 0;
-- Read the clob in the index
read_str := dbms_lob.substr(c_clob, read_cnt-c_offset, c_offset);
dbms_output.put_line('Line#' || read_str);
-- Now the current offset should point after the read line
c_offset := read_cnt+1;
end loop;
END;
/