Skip to content

Commit 3db3824

Browse files
author
Artur Zakirov
committed
First commit
0 parents  commit 3db3824

File tree

6 files changed

+2881
-0
lines changed

6 files changed

+2881
-0
lines changed

Makefile

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# pg_tsparser/Makefile
2+
3+
MODULE_big = pg_tsparser
4+
OBJS = tsparser.o $(WIN32RES)
5+
6+
EXTENSION = pg_tsparser
7+
DATA = pg_tsparser--1.0.sql
8+
PGFILEDESC = "pg_tsparser - parser for text search"
9+
10+
REGRESS = pg_tsparser
11+
12+
ifdef USE_PGXS
13+
PG_CONFIG = pg_config
14+
PGXS := $(shell $(PG_CONFIG) --pgxs)
15+
include $(PGXS)
16+
else
17+
subdir = contrib/pg_tsparser
18+
top_builddir = ../..
19+
include $(top_builddir)/src/Makefile.global
20+
include $(top_srcdir)/contrib/contrib-global.mk
21+
endif

expected/pg_tsparser.out

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
CREATE EXTENSION pg_tsparser;
2+
SELECT * FROM ts_token_type('tsparser');
3+
tokid | alias | description
4+
-------+-----------------+------------------------------------------
5+
1 | asciiword | Word, all ASCII
6+
2 | word | Word, all letters
7+
3 | numword | Word, letters and digits
8+
4 | email | Email address
9+
5 | url | URL
10+
6 | host | Host
11+
7 | sfloat | Scientific notation
12+
8 | version | Version number
13+
9 | hword_numpart | Hyphenated word part, letters and digits
14+
10 | hword_part | Hyphenated word part, all letters
15+
11 | hword_asciipart | Hyphenated word part, all ASCII
16+
12 | blank | Space symbols
17+
13 | tag | XML tag
18+
14 | protocol | Protocol head
19+
15 | numhword | Hyphenated word, letters and digits
20+
16 | asciihword | Hyphenated word, all ASCII
21+
17 | hword | Hyphenated word, all letters
22+
18 | url_path | URL path
23+
19 | file | File or path name
24+
20 | float | Decimal notation
25+
21 | int | Signed integer
26+
22 | uint | Unsigned integer
27+
23 | entity | XML entity
28+
(23 rows)
29+
30+
SELECT * FROM ts_parse('tsparser', '345 [email protected] '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] [email protected] [email protected] [email protected] qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
31+
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
32+
<i <b> wow < jqw <> qwerty');
33+
tokid | token
34+
-------+--------------------------------------
35+
22 | 345
36+
12 |
37+
1 | qwe
38+
12 | @
39+
19 | efd.r
40+
12 | '
41+
14 | http://
42+
6 | www.com
43+
12 | /
44+
14 | http://
45+
5 | aew.werc.ewr/?ad=qwe&dw
46+
6 | aew.werc.ewr
47+
18 | /?ad=qwe&dw
48+
12 |
49+
5 | 1aew.werc.ewr/?ad=qwe&dw
50+
6 | 1aew.werc.ewr
51+
18 | /?ad=qwe&dw
52+
12 |
53+
6 | 2aew.werc.ewr
54+
12 |
55+
14 | http://
56+
5 | 3aew.werc.ewr/?ad=qwe&dw
57+
6 | 3aew.werc.ewr
58+
18 | /?ad=qwe&dw
59+
12 |
60+
14 | http://
61+
6 | 4aew.werc.ewr
62+
12 |
63+
14 | http://
64+
5 | 5aew.werc.ewr:8100/?
65+
6 | 5aew.werc.ewr:8100
66+
18 | /?
67+
12 |
68+
1 | ad
69+
12 | =
70+
1 | qwe
71+
12 | &
72+
1 | dw
73+
12 |
74+
5 | 6aew.werc.ewr:8100/?ad=qwe&dw
75+
6 | 6aew.werc.ewr:8100
76+
18 | /?ad=qwe&dw
77+
12 |
78+
5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
79+
6 | 7aew.werc.ewr:8100
80+
18 | /?ad=qwe&dw=%20%32
81+
12 |
82+
7 | +4.0e-10
83+
12 |
84+
1 | qwe
85+
12 |
86+
1 | qwe
87+
12 |
88+
1 | qwqwe
89+
12 |
90+
20 | 234.435
91+
12 |
92+
22 | 455
93+
12 |
94+
20 | 5.005
95+
12 |
96+
97+
12 |
98+
99+
12 |
100+
101+
12 |
102+
103+
12 |
104+
16 | qwe-wer
105+
11 | qwe
106+
12 | -
107+
11 | wer
108+
12 |
109+
1 | asdf
110+
12 |
111+
13 | <fr>
112+
1 | qwer
113+
12 |
114+
1 | jf
115+
12 |
116+
1 | sdjk
117+
12 | <
118+
1 | we
119+
12 |
120+
1 | hjwer
121+
12 |
122+
13 | <werrwe>
123+
12 |
124+
3 | ewr1
125+
12 | >
126+
3 | ewri2
127+
12 |
128+
13 | <a href="qwe<qwe>">
129+
12 | +
130+
|
131+
19 | /usr/local/fff
132+
12 |
133+
19 | /awdf/dwqe/4325
134+
12 |
135+
19 | rewt/ewr
136+
12 |
137+
1 | wefjn
138+
12 |
139+
19 | /wqe-324/ewr
140+
12 |
141+
19 | gist.h
142+
12 |
143+
19 | gist.h.c
144+
12 |
145+
19 | gist.c
146+
12 | .
147+
1 | readline
148+
12 |
149+
20 | 4.2
150+
12 |
151+
20 | 4.2
152+
12 | .
153+
20 | 4.2
154+
12 | ,
155+
1 | readline
156+
20 | -4.2
157+
12 |
158+
1 | readline
159+
20 | -4.2
160+
12 | .
161+
22 | 234
162+
12 | +
163+
|
164+
12 | <
165+
1 | i
166+
12 |
167+
13 | <b>
168+
12 |
169+
1 | wow
170+
12 |
171+
12 | <
172+
1 | jqw
173+
12 |
174+
12 | <>
175+
1 | qwerty
176+
(139 rows)
177+

pg_tsparser--1.0.sql

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/* pg_tsparser/pg_tsparser--1.0.sql */
2+
3+
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
4+
\echo Use "CREATE EXTENSION pg_tsparser" to load this file. \quit
5+
6+
CREATE OR REPLACE FUNCTION tsparser_start(internal, int4)
7+
RETURNS internal
8+
AS 'MODULE_PATHNAME'
9+
LANGUAGE C STRICT;
10+
11+
CREATE OR REPLACE FUNCTION tsparser_nexttoken(internal, internal, internal)
12+
RETURNS internal
13+
AS 'MODULE_PATHNAME'
14+
LANGUAGE C STRICT;
15+
16+
CREATE OR REPLACE FUNCTION tsparser_end(internal)
17+
RETURNS void
18+
AS 'MODULE_PATHNAME'
19+
LANGUAGE C STRICT;
20+
21+
CREATE OR REPLACE FUNCTION tsparser_lextype(internal)
22+
RETURNS internal
23+
AS 'MODULE_PATHNAME'
24+
LANGUAGE C STRICT;
25+
26+
CREATE OR REPLACE FUNCTION tsparser_headline(internal, internal, tsquery)
27+
RETURNS internal
28+
AS 'MODULE_PATHNAME'
29+
LANGUAGE C STRICT;
30+
31+
CREATE TEXT SEARCH PARSER tsparser (
32+
START = tsparser_start,
33+
GETTOKEN = tsparser_nexttoken,
34+
END = tsparser_end,
35+
HEADLINE = tsparser_headline,
36+
LEXTYPES = tsparser_lextype
37+
);
38+
39+
COMMENT ON TEXT SEARCH PARSER tsparser IS 'parser for text search';

pg_tsparser.control

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# pg_tsparser extension
2+
comment = 'parser for text search'
3+
default_version = '1.0'
4+
module_pathname = '$libdir/pg_tsparser'
5+
relocatable = true

sql/pg_tsparser.sql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
CREATE EXTENSION pg_tsparser;
2+
3+
SELECT * FROM ts_token_type('tsparser');
4+
5+
SELECT * FROM ts_parse('tsparser', '345 [email protected] '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 [email protected] [email protected] [email protected] [email protected] qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
6+
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
7+
<i <b> wow < jqw <> qwerty');

0 commit comments

Comments
 (0)