Is there a way to parse a Google search string into a table variable in T-SQL?

advertisements

Is there a way to parse a Google search string to a table variable in T-SQL?

By Google search string I mean, including the plus sign (require), minus sign (exclude), and exact phrase (double quotes) operators.

For example the following search string:

one -two +three "four five" -"six seven" +"eight nine" "ten eleven twelve"

Would be parsed to a table variable that I could use to generate a T-SQL where clause:

OPERATOR    STRING
            one
-           two
+           three
            four five
-           six seven
+           eight nine
            ten eleven twelve

Thanks!


Well parsers are written the same way in any language, using a state machine. Nothing like writting a short parser in the morning to get your sinapses lubricated:

declare @s varchar(max);
declare @t table (operator char(1) null, token varchar(max));

set @s = 'one -two +three "four five" -"six seven" +"eight nine" "ten eleven twelve"';

declare @state varchar(100);
declare @operator char(1);
declare @token varchar(max);
declare @c char(1);
declare @i int;

set @state = 'start';
set @i = -1;

while (1=1)
begin
  set @i = @i + 1;
  if (@i > len(@s))
    break;
  set @c = substring(@s, @i, 1);
  if (@state = 'start')
  begin
    if @c in ('-', '+')
    begin
      set @operator = @c;
      set @token = '';
      set @state = 'operator';
      continue;
    end
    else if @c = '"'
    begin
      set @operator = null;
      set @token = '';
      set @state = 'quote';
      continue;
    end
    else if (@c between 'a' and 'Z')
      or (@c between '0' and '9')
    begin
      set @operator = null;
      set @token = @c;
      set @state = 'token';
      continue;
    end
    else
      continue; -- ignore noise
  end
  else if @state = 'token'
  begin
    if (@c between 'a' and 'Z')
      or (@c between '0' and '9')
    begin
      set @token = @token + @c;
      continue;
    end
    else
    begin
      insert into @t (operator, token)
        values (@operator, @token);
      set @state = 'start';
      continue;
    end
  end
  else if @state = 'quote'
  begin
    if (@c != '"')
    begin
      set @token = @[email protected];
      continue;
    end
    else
    begin
      insert into @t (operator, token)
        values (@operator, @token);
      set @state = 'start';
      continue;
    end
  end
  else if @state = 'operator'
  begin
    if @c = '"'
    begin
      set @token = '';
      set @state = 'quote';
      continue;
    end
    else if (@c between 'a' and 'Z')
      or (@c between '0' and '9')
    begin
      set @token = @c;
      set @state = 'token';
      continue;
    end
    else
    begin
      -- consider raising error here, invalid char after operator +/-
      set @state = 'start';
      continue;
    end
  end
  else
    raiserror ('Unexpected state %s', 16,2, @state);
end
if @state = 'token'
begin
  insert into @t (operator, token)
        values (@operator, @token);
end
else if @state != 'start'
begin
  raiserror ('Incorrectly formatted string, must not end in state %s', 16, 1, @state);
end

select * from @t;